3 yıl önce · 75a653a5c7
--- a/code/components/tflite-lib/tensorflow/lite/builtin_op_data.h
+++ b/code/components/tflite-lib/tensorflow/lite/builtin_op_data.h
@@ -1,22 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-// Compatibility shim for new location of interface definitions.
														
 
															-
														
 
															-#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
														
 
															-#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
														
 
															-
														
 
															-#include "tensorflow/lite/c/builtin_op_data.h"
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h
+++ b/code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h
@@ -1,525 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
														
 
															-#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif  // __cplusplus
														
 
															-
														
 
															-// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
														
 
															-// number of dimensions.
														
 
															-#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
														
 
															-
														
 
															-// TODO(aselle): Consider using "if this then that" for testing.
														
 
															-
														
 
															-// Useful placeholder to put in otherwise empty structs to avoid size warnings.
														
 
															-typedef struct {
														
 
															-  char dummy;
														
 
															-} EmptyStructPlaceholder;
														
 
															-
														
 
															-// IMPORTANT: All new members of structs must be added at the end to ensure
														
 
															-// backwards compatibility.
														
 
															-
														
 
															-// Possible padding types (for convolutions)
														
 
															-typedef enum {
														
 
															-  kTfLitePaddingUnknown = 0,
														
 
															-  kTfLitePaddingSame,
														
 
															-  kTfLitePaddingValid,
														
 
															-} TfLitePadding;
														
 
															-
														
 
															-typedef enum {
														
 
															-  kTfLiteMirrorPaddingUnknown = 0,
														
 
															-  kTfLiteMirrorPaddingReflect,
														
 
															-  kTfLiteMirrorPaddingSymmetric,
														
 
															-} TfLiteMirrorPaddingMode;
														
 
															-
														
 
															-// TODO(b/130259536): We should move this out of builtin_op_data.
														
 
															-typedef struct {
														
 
															-  int width;
														
 
															-  int height;
														
 
															-  int width_offset;
														
 
															-  int height_offset;
														
 
															-} TfLitePaddingValues;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteMirrorPaddingMode mode;
														
 
															-} TfLiteMirrorPaddingParams;
														
 
															-
														
 
															-// Possible fused activation functions.
														
 
															-typedef enum {
														
 
															-  kTfLiteActNone = 0,
														
 
															-  kTfLiteActRelu,
														
 
															-  kTfLiteActReluN1To1,  // min(max(-1, x), 1)
														
 
															-  kTfLiteActRelu6,      // min(max(0, x), 6)
														
 
															-  kTfLiteActTanh,
														
 
															-  kTfLiteActSignBit,
														
 
															-  kTfLiteActSigmoid,
														
 
															-} TfLiteFusedActivation;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // Parameters for CONV_2D version 1.
														
 
															-  TfLitePadding padding;
														
 
															-  int stride_width;
														
 
															-  int stride_height;
														
 
															-  TfLiteFusedActivation activation;
														
 
															-
														
 
															-  // Parameters for CONV_2D version 2.
														
 
															-  // Note: Version 2 supports dilation values not equal to 1.
														
 
															-  int dilation_width_factor;
														
 
															-  int dilation_height_factor;
														
 
															-} TfLiteConvParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLitePadding padding;
														
 
															-  int stride_width;
														
 
															-  int stride_height;
														
 
															-  int stride_depth;
														
 
															-  int dilation_width_factor;
														
 
															-  int dilation_height_factor;
														
 
															-  int dilation_depth_factor;
														
 
															-  TfLiteFusedActivation activation;
														
 
															-} TfLiteConv3DParams;
														
 
															-
														
 
															-typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLitePadding padding;
														
 
															-  int stride_width;
														
 
															-  int stride_height;
														
 
															-  int filter_width;
														
 
															-  int filter_height;
														
 
															-  TfLiteFusedActivation activation;
														
 
															-  struct {
														
 
															-    TfLitePaddingValues padding;
														
 
															-  } computed;
														
 
															-} TfLitePoolParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // Parameters for DepthwiseConv version 1 or above.
														
 
															-  TfLitePadding padding;
														
 
															-  int stride_width;
														
 
															-  int stride_height;
														
 
															-  // `depth_multiplier` is redundant. It's used by CPU kernels in
														
 
															-  // TensorFlow 2.0 or below, but ignored in versions above.
														
 
															-  //
														
 
															-  // The information can be deduced from the shape of input and the shape of
														
 
															-  // weights. Since the TFLiteConverter toolchain doesn't support partially
														
 
															-  // specified shapes, relying on `depth_multiplier` stops us from supporting
														
 
															-  // graphs with dynamic shape tensors.
														
 
															-  //
														
 
															-  // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
														
 
															-  // field.
														
 
															-  int depth_multiplier;
														
 
															-  TfLiteFusedActivation activation;
														
 
															-  // Parameters for DepthwiseConv version 2 or above.
														
 
															-  int dilation_width_factor;
														
 
															-  int dilation_height_factor;
														
 
															-} TfLiteDepthwiseConvParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int rank;
														
 
															-  TfLiteFusedActivation activation;
														
 
															-
														
 
															-  // Parameter for SVDF version 4.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteSVDFParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteFusedActivation activation;
														
 
															-
														
 
															-  // Parameter for RNN version 3.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteRNNParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool time_major;
														
 
															-  TfLiteFusedActivation activation;
														
 
															-
														
 
															-  // Parameter for Sequence RNN version 3.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteSequenceRNNParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool time_major;
														
 
															-  TfLiteFusedActivation activation;
														
 
															-  bool merge_outputs;
														
 
															-
														
 
															-  // Parameter for Bidirectional RNN verison 3.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteBidirectionalSequenceRNNParams;
														
 
															-
														
 
															-typedef enum {
														
 
															-  kTfLiteFullyConnectedWeightsFormatDefault = 0,
														
 
															-  kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
														
 
															-} TfLiteFullyConnectedWeightsFormat;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // Parameters for FullyConnected version 1 or above.
														
 
															-  TfLiteFusedActivation activation;
														
 
															-
														
 
															-  // Parameters for FullyConnected version 2 or above.
														
 
															-  TfLiteFullyConnectedWeightsFormat weights_format;
														
 
															-
														
 
															-  // Parameters for FullyConnected version 5 or above.
														
 
															-  // If set to true, then the number of dimensions in the input and the output
														
 
															-  // tensors are the same. Furthermore, all but the last dimension of the input
														
 
															-  // and output shapes will be equal.
														
 
															-  bool keep_num_dims;
														
 
															-
														
 
															-  // Parameters for FullyConnected version 7 or above.
														
 
															-  // If set to true and the weights are quantized, then non constant inputs
														
 
															-  // are quantized at evaluation time with asymmetric quantization.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteFullyConnectedParams;
														
 
															-
														
 
															-typedef enum {
														
 
															-  kTfLiteLshProjectionUnknown = 0,
														
 
															-  kTfLiteLshProjectionSparse = 1,
														
 
															-  kTfLiteLshProjectionDense = 2,
														
 
															-} TfLiteLSHProjectionType;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteLSHProjectionType type;
														
 
															-} TfLiteLSHProjectionParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  float beta;
														
 
															-} TfLiteSoftmaxParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int axis;
														
 
															-  TfLiteFusedActivation activation;
														
 
															-} TfLiteConcatenationParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteFusedActivation activation;
														
 
															-  // Parameter added for the version 4.
														
 
															-  bool pot_scale_int16;
														
 
															-} TfLiteAddParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  EmptyStructPlaceholder placeholder;
														
 
															-} TfLiteSpaceToBatchNDParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  EmptyStructPlaceholder placeholder;
														
 
															-} TfLiteBatchToSpaceNDParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool adj_x;
														
 
															-  bool adj_y;
														
 
															-  // Parameters for BatchMatMul version 4 or above.
														
 
															-  // If set to true and the weights are quantized, then non constant inputs
														
 
															-  // are quantized at evaluation time with asymmetric quantization.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteBatchMatMulParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteFusedActivation activation;
														
 
															-} TfLiteMulParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteFusedActivation activation;
														
 
															-  // Parameter added for the version 5.
														
 
															-  bool pot_scale_int16;
														
 
															-} TfLiteSubParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteFusedActivation activation;
														
 
															-} TfLiteDivParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteFusedActivation activation;
														
 
															-} TfLiteL2NormParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int radius;
														
 
															-  float bias;
														
 
															-  float alpha;
														
 
															-  float beta;
														
 
															-} TfLiteLocalResponseNormParams;
														
 
															-
														
 
															-typedef enum {
														
 
															-  kTfLiteLSTMFullKernel = 0,
														
 
															-  kTfLiteLSTMBasicKernel
														
 
															-} TfLiteLSTMKernelType;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // Parameters for LSTM version 1.
														
 
															-  TfLiteFusedActivation activation;
														
 
															-  float cell_clip;
														
 
															-  float proj_clip;
														
 
															-
														
 
															-  // Parameters for LSTM version 2.
														
 
															-  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
														
 
															-  TfLiteLSTMKernelType kernel_type;
														
 
															-
														
 
															-  // Parameters for LSTM version 4.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteLSTMParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // Parameters needed for the underlying LSTM.
														
 
															-  TfLiteFusedActivation activation;
														
 
															-  float cell_clip;
														
 
															-  float proj_clip;
														
 
															-
														
 
															-  // If set to true then the first dimension is time, otherwise batch.
														
 
															-  bool time_major;
														
 
															-
														
 
															-  // Parameter for unidirectional sequence RNN version 3.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteUnidirectionalSequenceLSTMParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // Parameters supported by version 1:
														
 
															-  // Parameters inherited for the LSTM kernel.
														
 
															-  TfLiteFusedActivation activation;
														
 
															-  float cell_clip;
														
 
															-  float proj_clip;
														
 
															-
														
 
															-  // If true, store the outputs of both directions in the first output.
														
 
															-  bool merge_outputs;
														
 
															-
														
 
															-  // Parameters supported by version 2:
														
 
															-  // If set to true then the first dimension is time, otherwise batch.
														
 
															-  bool time_major;
														
 
															-
														
 
															-  // Parameters supported by version 4:
														
 
															-  // If set to true, then hybrid ops use asymmetric quantization for inputs.
														
 
															-  bool asymmetric_quantize_inputs;
														
 
															-} TfLiteBidirectionalSequenceLSTMParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool align_corners;
														
 
															-  // half_pixel_centers assumes pixels are of half the actual dimensions, and
														
 
															-  // yields more accurate resizes. Corresponds to the same argument for the
														
 
															-  // original TensorFlow op in TF2.0.
														
 
															-  bool half_pixel_centers;
														
 
															-} TfLiteResizeBilinearParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool align_corners;
														
 
															-  bool half_pixel_centers;
														
 
															-} TfLiteResizeNearestNeighborParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  EmptyStructPlaceholder placeholder;
														
 
															-} TfLitePadParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  EmptyStructPlaceholder placeholder;
														
 
															-} TfLitePadV2Params;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // These fields are only used in old models for backward compatibility.
														
 
															-  // In the current implementation, we use the 2nd input of the op as the shape,
														
 
															-  // and these fields are unused.
														
 
															-  int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
														
 
															-  int num_dimensions;
														
 
															-} TfLiteReshapeParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int ngram_size;
														
 
															-  int max_skip_size;
														
 
															-  bool include_all_ngrams;
														
 
															-} TfLiteSkipGramParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int block_size;
														
 
															-} TfLiteSpaceToDepthParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int block_size;
														
 
															-} TfLiteDepthToSpaceParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteType in_data_type;
														
 
															-  TfLiteType out_data_type;
														
 
															-} TfLiteCastParams;
														
 
															-
														
 
															-typedef enum {
														
 
															-  kTfLiteCombinerTypeSum = 0,
														
 
															-  kTfLiteCombinerTypeMean = 1,
														
 
															-  kTfLiteCombinerTypeSqrtn = 2,
														
 
															-} TfLiteCombinerType;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteCombinerType combiner;
														
 
															-} TfLiteEmbeddingLookupSparseParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int axis;
														
 
															-  int batch_dims;
														
 
															-} TfLiteGatherParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  EmptyStructPlaceholder placeholder;
														
 
															-} TfLiteTransposeParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool keep_dims;
														
 
															-} TfLiteReducerParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int num_splits;
														
 
															-} TfLiteSplitParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int num_splits;
														
 
															-} TfLiteSplitVParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
														
 
															-  // For now we will fix the maximum possible number of dimensions.
														
 
															-  int squeeze_dims[8];
														
 
															-  int num_squeeze_dims;
														
 
															-} TfLiteSqueezeParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int begin_mask;
														
 
															-  int end_mask;
														
 
															-  int ellipsis_mask;
														
 
															-  int new_axis_mask;
														
 
															-  int shrink_axis_mask;
														
 
															-} TfLiteStridedSliceParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteType output_type;
														
 
															-} TfLiteArgMaxParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteType output_type;
														
 
															-} TfLiteArgMinParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLitePadding padding;
														
 
															-  int stride_width;
														
 
															-  int stride_height;
														
 
															-} TfLiteTransposeConvParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool validate_indices;
														
 
															-} TfLiteSparseToDenseParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteType out_type;
														
 
															-} TfLiteShapeParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  EmptyStructPlaceholder placeholder;
														
 
															-} TfLiteRankParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  // Parameters supported by version 1:
														
 
															-  float min;
														
 
															-  float max;
														
 
															-  int num_bits;
														
 
															-
														
 
															-  // Parameters supported by version 2:
														
 
															-  bool narrow_range;
														
 
															-} TfLiteFakeQuantParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int values_count;
														
 
															-  int axis;
														
 
															-} TfLitePackParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int axis;
														
 
															-} TfLiteOneHotParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int num;
														
 
															-  int axis;
														
 
															-} TfLiteUnpackParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  float alpha;
														
 
															-} TfLiteLeakyReluParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  TfLiteType index_out_type;
														
 
															-} TfLiteUniqueParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int seq_dim;
														
 
															-  int batch_dim;
														
 
															-} TfLiteReverseSequenceParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  EmptyStructPlaceholder placeholder;
														
 
															-} TfLiteMatrixDiagParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  EmptyStructPlaceholder placeholder;
														
 
															-} TfLiteMatrixSetDiagParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int then_subgraph_index;
														
 
															-  int else_subgraph_index;
														
 
															-} TfLiteIfParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int cond_subgraph_index;
														
 
															-  int body_subgraph_index;
														
 
															-} TfLiteWhileParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool exclusive;
														
 
															-  bool reverse;
														
 
															-} TfLiteCumsumParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int init_subgraph_index;
														
 
															-} TfLiteCallOnceParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int table_id;
														
 
															-  TfLiteType key_dtype;
														
 
															-  TfLiteType value_dtype;
														
 
															-} TfLiteHashtableParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  const char* container;
														
 
															-  const char* shared_name;
														
 
															-} TfLiteVarHandleParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int seed;
														
 
															-  int seed2;
														
 
															-} TfLiteRandomParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  int num_boundaries;
														
 
															-  // This points to the memory stored in the model (flatbuffer),
														
 
															-  // and is not owned.
														
 
															-  const float* boundaries;
														
 
															-} TfLiteBucketizeParams;
														
 
															-
														
 
															-typedef struct {
														
 
															-  bool approximate;
														
 
															-} TfLiteGeluParams;
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif  // __cplusplus
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/c/c_api_types.h
+++ b/code/components/tflite-lib/tensorflow/lite/c/c_api_types.h
@@ -1,130 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-// This file declares types used by the pure C inference API defined in c_api.h,
														
 
															-// some of which are also used in the C++ and C kernel and interpreter APIs.
														
 
															-
														
 
															-#ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_
														
 
															-#define TENSORFLOW_LITE_C_C_API_TYPES_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
														
 
															-// library.
														
 
															-#ifdef SWIG
														
 
															-#define TFL_CAPI_EXPORT
														
 
															-#elif defined(TFL_STATIC_LIBRARY_BUILD)
														
 
															-#define TFL_CAPI_EXPORT
														
 
															-#else  // not definded TFL_STATIC_LIBRARY_BUILD
														
 
															-#if defined(_WIN32)
														
 
															-#ifdef TFL_COMPILE_LIBRARY
														
 
															-#define TFL_CAPI_EXPORT __declspec(dllexport)
														
 
															-#else
														
 
															-#define TFL_CAPI_EXPORT __declspec(dllimport)
														
 
															-#endif  // TFL_COMPILE_LIBRARY
														
 
															-#else
														
 
															-#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
														
 
															-#endif  // _WIN32
														
 
															-#endif  // SWIG
														
 
															-
														
 
															-// Note that new error status values may be added in future in order to
														
 
															-// indicate more fine-grained internal states, therefore, applications should
														
 
															-// not rely on status values being members of the enum.
														
 
															-typedef enum TfLiteStatus {
														
 
															-  kTfLiteOk = 0,
														
 
															-
														
 
															-  // Generally referring to an error in the runtime (i.e. interpreter)
														
 
															-  kTfLiteError = 1,
														
 
															-
														
 
															-  // Generally referring to an error from a TfLiteDelegate itself.
														
 
															-  kTfLiteDelegateError = 2,
														
 
															-
														
 
															-  // Generally referring to an error in applying a delegate due to
														
 
															-  // incompatibility between runtime and delegate, e.g., this error is returned
														
 
															-  // when trying to apply a TF Lite delegate onto a model graph that's already
														
 
															-  // immutable.
														
 
															-  kTfLiteApplicationError = 3,
														
 
															-
														
 
															-  // Generally referring to serialized delegate data not being found.
														
 
															-  // See tflite::delegates::Serialization.
														
 
															-  kTfLiteDelegateDataNotFound = 4,
														
 
															-
														
 
															-  // Generally referring to data-writing issues in delegate serialization.
														
 
															-  // See tflite::delegates::Serialization.
														
 
															-  kTfLiteDelegateDataWriteError = 5,
														
 
															-
														
 
															-  // Generally referring to data-reading issues in delegate serialization.
														
 
															-  // See tflite::delegates::Serialization.
														
 
															-  kTfLiteDelegateDataReadError = 6,
														
 
															-
														
 
															-  // Generally referring to issues when the TF Lite model has ops that cannot be
														
 
															-  // resolved at runtime. This could happen when the specific op is not
														
 
															-  // registered or built with the TF Lite framework.
														
 
															-  kTfLiteUnresolvedOps = 7,
														
 
															-} TfLiteStatus;
														
 
															-
														
 
															-// Types supported by tensor
														
 
															-typedef enum {
														
 
															-  kTfLiteNoType = 0,
														
 
															-  kTfLiteFloat32 = 1,
														
 
															-  kTfLiteInt32 = 2,
														
 
															-  kTfLiteUInt8 = 3,
														
 
															-  kTfLiteInt64 = 4,
														
 
															-  kTfLiteString = 5,
														
 
															-  kTfLiteBool = 6,
														
 
															-  kTfLiteInt16 = 7,
														
 
															-  kTfLiteComplex64 = 8,
														
 
															-  kTfLiteInt8 = 9,
														
 
															-  kTfLiteFloat16 = 10,
														
 
															-  kTfLiteFloat64 = 11,
														
 
															-  kTfLiteComplex128 = 12,
														
 
															-  kTfLiteUInt64 = 13,
														
 
															-  kTfLiteResource = 14,
														
 
															-  kTfLiteVariant = 15,
														
 
															-  kTfLiteUInt32 = 16,
														
 
															-  kTfLiteUInt16 = 17,
														
 
															-} TfLiteType;
														
 
															-
														
 
															-// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
														
 
															-// If per-layer quantization is specified this field will still be populated in
														
 
															-// addition to TfLiteAffineQuantization.
														
 
															-// Parameters for asymmetric quantization. Quantized values can be converted
														
 
															-// back to float using:
														
 
															-//     real_value = scale * (quantized_value - zero_point)
														
 
															-typedef struct TfLiteQuantizationParams {
														
 
															-  float scale;
														
 
															-  int32_t zero_point;
														
 
															-} TfLiteQuantizationParams;
														
 
															-
														
 
															-// --------------------------------------------------------------------------
														
 
															-// Opaque types used by c_api.h, c_api_opaque.h and common.h.
														
 
															-
														
 
															-// TfLiteOpaqueContext is an opaque version of TfLiteContext;
														
 
															-typedef struct TfLiteOpaqueContext TfLiteOpaqueContext;
														
 
															-
														
 
															-// TfLiteOpaqueNode is an opaque version of TfLiteNode;
														
 
															-typedef struct TfLiteOpaqueNode TfLiteOpaqueNode;
														
 
															-
														
 
															-// TfLiteOpaqueTensor is an opaque version of TfLiteTensor;
														
 
															-typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor;
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern C
														
 
															-#endif
														
 
															-#endif  // TENSORFLOW_LITE_C_C_API_TYPES_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc
@@ -1,38 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/core/api/error_reporter.h"
														
 
															-#include <cstdarg>
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-int ErrorReporter::Report(const char* format, ...) {
														
 
															-  va_list args;
														
 
															-  va_start(args, format);
														
 
															-  int code = Report(format, args);
														
 
															-  va_end(args);
														
 
															-  return code;
														
 
															-}
														
 
															-
														
 
															-// TODO(aselle): Make the name of ReportError on context the same, so
														
 
															-// we can use the ensure functions w/o a context and w/ a reporter.
														
 
															-int ErrorReporter::ReportError(void*, const char* format, ...) {
														
 
															-  va_list args;
														
 
															-  va_start(args, format);
														
 
															-  int code = Report(format, args);
														
 
															-  va_end(args);
														
 
															-  return code;
														
 
															-}
														
 
															-
														
 
															-}  // namespace tflite
														
--- a/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h
@@ -1,59 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
														
 
															-#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
														
 
															-
														
 
															-#include <cstdarg>
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-/// A functor that reports error to supporting system. Invoked similar to
														
 
															-/// printf.
														
 
															-///
														
 
															-/// Usage:
														
 
															-///  ErrorReporter foo;
														
 
															-///  foo.Report("test %d", 5);
														
 
															-/// or
														
 
															-///  va_list args;
														
 
															-///  foo.Report("test %d", args); // where args is va_list
														
 
															-///
														
 
															-/// Subclass ErrorReporter to provide another reporting destination.
														
 
															-/// For example, if you have a GUI program, you might redirect to a buffer
														
 
															-/// that drives a GUI error log box.
														
 
															-class ErrorReporter {
														
 
															- public:
														
 
															-  virtual ~ErrorReporter() {}
														
 
															-  virtual int Report(const char* format, va_list args) = 0;
														
 
															-  int Report(const char* format, ...);
														
 
															-  int ReportError(void*, const char* format, ...);
														
 
															-};
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-// You should not make bare calls to the error reporter, instead use the
														
 
															-// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
														
 
															-// stripped when the binary size has to be optimized. If you are looking to
														
 
															-// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
														
 
															-// every call will be stubbed out, taking no memory.
														
 
															-#ifndef TF_LITE_STRIP_ERROR_STRINGS
														
 
															-#define TF_LITE_REPORT_ERROR(reporter, ...)                             \
														
 
															-  do {                                                                  \
														
 
															-    static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
														
 
															-  } while (false)
														
 
															-#else  // TF_LITE_STRIP_ERROR_STRINGS
														
 
															-#define TF_LITE_REPORT_ERROR(reporter, ...)
														
 
															-#endif  // TF_LITE_STRIP_ERROR_STRINGS
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc
@@ -1,68 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#include "tensorflow/lite/core/api/op_resolver.h"
														
 
															-
														
 
															-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-#include "tensorflow/lite/core/api/error_reporter.h"
														
 
															-#include "tensorflow/lite/schema/schema_utils.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-TfLiteStatus GetRegistrationFromOpCode(
														
 
															-    const OperatorCode* opcode, const OpResolver& op_resolver,
														
 
															-    ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
														
 
															-  TfLiteStatus status = kTfLiteOk;
														
 
															-  *registration = nullptr;
														
 
															-  auto builtin_code = GetBuiltinCode(opcode);
														
 
															-  int version = opcode->version();
														
 
															-
														
 
															-  if (builtin_code > BuiltinOperator_MAX) {
														
 
															-    TF_LITE_REPORT_ERROR(
														
 
															-        error_reporter,
														
 
															-        "Op builtin_code out of range: %d. Are you using old TFLite binary "
														
 
															-        "with newer model?",
														
 
															-        builtin_code);
														
 
															-    status = kTfLiteError;
														
 
															-  } else if (builtin_code != BuiltinOperator_CUSTOM) {
														
 
															-    *registration = op_resolver.FindOp(builtin_code, version);
														
 
															-    if (*registration == nullptr) {
														
 
															-      TF_LITE_REPORT_ERROR(
														
 
															-          error_reporter,
														
 
															-          "Didn't find op for builtin opcode '%s' version '%d'. "
														
 
															-          "An older version of this builtin might be supported. "
														
 
															-          "Are you using an old TFLite binary with a newer model?\n",
														
 
															-          EnumNameBuiltinOperator(builtin_code), version);
														
 
															-      status = kTfLiteError;
														
 
															-    }
														
 
															-  } else if (!opcode->custom_code()) {
														
 
															-    TF_LITE_REPORT_ERROR(
														
 
															-        error_reporter,
														
 
															-        "Operator with CUSTOM builtin_code has no custom_code.\n");
														
 
															-    status = kTfLiteError;
														
 
															-  } else {
														
 
															-    const char* name = opcode->custom_code()->c_str();
														
 
															-    *registration = op_resolver.FindOp(name, version);
														
 
															-    if (*registration == nullptr) {
														
 
															-      // Do not report error for unresolved custom op, we do the final check
														
 
															-      // while preparing ops.
														
 
															-      status = kTfLiteError;
														
 
															-    }
														
 
															-  }
														
 
															-  return status;
														
 
															-}
														
 
															-
														
 
															-}  // namespace tflite
														
--- a/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h
@@ -1,140 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
														
 
															-#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
														
 
															-
														
 
															-#include <functional>
														
 
															-#include <memory>
														
 
															-#include <vector>
														
 
															-
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-#include "tensorflow/lite/core/api/error_reporter.h"
														
 
															-#include "tensorflow/lite/schema/schema_generated.h"
														
 
															-
														
 
															-// Opaque type similar to TfLiteDelegate / TfLiteOpaqueDelegate.
														
 
															-// This is used for cases (e.g. when using "TF Lite with Google Play Services")
														
 
															-// where the TF Lite runtime might be built using a newer (or older)
														
 
															-// version of the TF Lite sources than the app, and hence might have a
														
 
															-// different definition of the TfLiteDelegate type. TF Lite APIs use
														
 
															-// TfLiteOpaqueDelegate rather than TfLiteDelegate when they want to
														
 
															-// refer to a delegate defined with that potentially different version
														
 
															-// of the TfLiteDelegate type.
														
 
															-struct TfLiteOpaqueDelegateStruct;
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-/// Abstract interface that returns TfLiteRegistrations given op codes or custom
														
 
															-/// op names. This is the mechanism that ops being referenced in the flatbuffer
														
 
															-/// model are mapped to executable function pointers (TfLiteRegistrations).
														
 
															-class OpResolver {
														
 
															- public:
														
 
															-  /// Finds the op registration for a builtin operator by enum code.
														
 
															-  virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
														
 
															-                                           int version) const = 0;
														
 
															-  /// Finds the op registration of a custom operator by op name.
														
 
															-  virtual const TfLiteRegistration* FindOp(const char* op,
														
 
															-                                           int version) const = 0;
														
 
															-
														
 
															-  // Represents a sequence of delegates.
														
 
															-  using TfLiteDelegatePtrVector =
														
 
															-      std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
														
 
															-
														
 
															-  // Returns optional delegates for resolving and handling ops in the flatbuffer
														
 
															-  // model. This may be used in addition to the standard TfLiteRegistration
														
 
															-  // lookup for graph resolution.
														
 
															-  // WARNING: This API is deprecated, GetDelegateCreators is preferred.
														
 
															-  virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
														
 
															-    return {};
														
 
															-  }
														
 
															-
														
 
															-  // Represents a function that creates a TfLite delegate instance.
														
 
															-  using TfLiteDelegateCreator =
														
 
															-      std::function<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
														
 
															-          int /*num_threads*/)>;
														
 
															-
														
 
															-  // Represents a sequence of delegate creator functions.
														
 
															-  using TfLiteDelegateCreators = std::vector<TfLiteDelegateCreator>;
														
 
															-
														
 
															-  // Returns a vector of delegate creators to create optional delegates for
														
 
															-  // resolving and handling ops in the flatbuffer model. This may be used in
														
 
															-  // addition to the standard TfLiteRegistration lookup for graph resolution.
														
 
															-  //
														
 
															-  // Note that this method is not used (will not be called) if you are using
														
 
															-  // TF Lite in Google Play Services; the GetOpaqueDelegateCreators method
														
 
															-  // (see below) is used for that case.
														
 
															-  virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; }
														
 
															-
														
 
															-  // TODO(b/202712825): it would be nice if we could avoid the need for separate
														
 
															-  // "opaque" types & methods for use only with TF Lite in Google Play Services.
														
 
															-
														
 
															-  // Represents an opaque delegate instance.
														
 
															-  // WARNING: Experimental interface, subject to change.
														
 
															-  using TfLiteOpaqueDelegatePtr =
														
 
															-      std::unique_ptr<TfLiteOpaqueDelegateStruct,
														
 
															-                      void (*)(TfLiteOpaqueDelegateStruct*)>;
														
 
															-
														
 
															-  // Represents a function that creates an opaque delegate instance.
														
 
															-  // WARNING: Experimental interface, subject to change.
														
 
															-  using TfLiteOpaqueDelegateCreator =
														
 
															-      std::function<TfLiteOpaqueDelegatePtr(int /*num_threads*/)>;
														
 
															-
														
 
															-  // Represents a sequence of opaque delegate creator functions.
														
 
															-  // WARNING: Experimental interface, subject to change.
														
 
															-  using TfLiteOpaqueDelegateCreators = std::vector<TfLiteOpaqueDelegateCreator>;
														
 
															-
														
 
															-  // Returns a vector of opaque delegate creators to create optional opaque
														
 
															-  // delegates for resolving and handling ops in the flatbuffer model. This may
														
 
															-  // be used in addition to the standard TfLiteRegistration lookup for graph
														
 
															-  // resolution.
														
 
															-  //
														
 
															-  // Note that this method will be called only if you are using TF Lite in
														
 
															-  // Google Play Services; if you are using regular TF Lite, GetDelegateCreators
														
 
															-  // (see above) is used instead.
														
 
															-  //
														
 
															-  // WARNING: Experimental interface, subject to change.
														
 
															-  virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const {
														
 
															-    return {};
														
 
															-  }
														
 
															-
														
 
															-  virtual ~OpResolver() {}
														
 
															-
														
 
															- private:
														
 
															-  /// Returns true if this OpResolver may contain any "user defined" ops.
														
 
															-  /// By "user defined" ops, we mean any op definitions other than those
														
 
															-  /// contained in tflite::ops::builtin::BuiltinOpResolver.
														
 
															-  ///
														
 
															-  /// If this method returns true, it doesn't necessarily mean that the
														
 
															-  /// OpResolver contains a user-defined op, just that the absence of
														
 
															-  /// user-defined ops can't be guaranteed.
														
 
															-  ///
														
 
															-  /// Note that "user-defined" ops are not the same as "custom" ops;
														
 
															-  /// BuiltinOpResolver may support certain "custom" ops, in addition to
														
 
															-  /// "builtin" ops, and may not support all of the "builtin" op enum values.
														
 
															-  virtual bool MayContainUserDefinedOps() const { return true; }
														
 
															-
														
 
															-  friend class OpResolverInternal;
														
 
															-};
														
 
															-
														
 
															-// Handles the logic for converting between an OperatorCode structure extracted
														
 
															-// from a flatbuffer and information about a registered operator
														
 
															-// implementation.
														
 
															-TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
														
 
															-                                       const OpResolver& op_resolver,
														
 
															-                                       ErrorReporter* error_reporter,
														
 
															-                                       const TfLiteRegistration** registration);
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc
@@ -1,50 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#include "tensorflow/lite/core/api/tensor_utils.h"
														
 
															-
														
 
															-#include <string.h>
														
 
															-
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
														
 
															-  if (!tensor->is_variable) {
														
 
															-    return kTfLiteOk;
														
 
															-  }
														
 
															-  // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
														
 
															-  // to the value of the buffer.
														
 
															-  int value = 0;
														
 
															-  if (tensor->type == kTfLiteInt8) {
														
 
															-    value = tensor->params.zero_point;
														
 
															-  }
														
 
															-  // TODO(b/139446230): Provide a platform header to better handle these
														
 
															-  // specific scenarios.
														
 
															-#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
														
 
															-    defined(__i386) || defined(__x86__) || defined(__X86__) || \
														
 
															-    defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
														
 
															-  memset(tensor->data.raw, value, tensor->bytes);
														
 
															-#else
														
 
															-  char* raw_ptr = tensor->data.raw;
														
 
															-  for (size_t i = 0; i < tensor->bytes; ++i) {
														
 
															-    *raw_ptr = value;
														
 
															-    raw_ptr++;
														
 
															-  }
														
 
															-#endif
														
 
															-  return kTfLiteOk;
														
 
															-}
														
 
															-
														
 
															-}  // namespace tflite
														
--- a/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h
@@ -1,28 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
														
 
															-#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
														
 
															-
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-// Resets a variable tensor to the default value.
														
 
															-TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h
@@ -1,102 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-#include <cstdint>
														
 
															-
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-static inline int CountLeadingZeros32Slow(uint64_t n) {
														
 
															-  int zeroes = 28;
														
 
															-  if (n >> 16) zeroes -= 16, n >>= 16;
														
 
															-  if (n >> 8) zeroes -= 8, n >>= 8;
														
 
															-  if (n >> 4) zeroes -= 4, n >>= 4;
														
 
															-  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
														
 
															-}
														
 
															-
														
 
															-static inline int CountLeadingZeros32(uint32_t n) {
														
 
															-#if defined(_MSC_VER)
														
 
															-  unsigned long result = 0;  // NOLINT(runtime/int)
														
 
															-  if (_BitScanReverse(&result, n)) {
														
 
															-    return 31 - result;
														
 
															-  }
														
 
															-  return 32;
														
 
															-#elif defined(__GNUC__)
														
 
															-
														
 
															-  // Handle 0 as a special case because __builtin_clz(0) is undefined.
														
 
															-  if (n == 0) {
														
 
															-    return 32;
														
 
															-  }
														
 
															-  return __builtin_clz(n);
														
 
															-#else
														
 
															-  return CountLeadingZeros32Slow(n);
														
 
															-#endif
														
 
															-}
														
 
															-
														
 
															-static inline int MostSignificantBit32(uint32_t n) {
														
 
															-  return 32 - CountLeadingZeros32(n);
														
 
															-}
														
 
															-
														
 
															-static inline int CountLeadingZeros64Slow(uint64_t n) {
														
 
															-  int zeroes = 60;
														
 
															-  if (n >> 32) zeroes -= 32, n >>= 32;
														
 
															-  if (n >> 16) zeroes -= 16, n >>= 16;
														
 
															-  if (n >> 8) zeroes -= 8, n >>= 8;
														
 
															-  if (n >> 4) zeroes -= 4, n >>= 4;
														
 
															-  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
														
 
															-}
														
 
															-
														
 
															-static inline int CountLeadingZeros64(uint64_t n) {
														
 
															-#if defined(_MSC_VER) && defined(_M_X64)
														
 
															-  // MSVC does not have __builtin_clzll. Use _BitScanReverse64.
														
 
															-  unsigned long result = 0;  // NOLINT(runtime/int)
														
 
															-  if (_BitScanReverse64(&result, n)) {
														
 
															-    return 63 - result;
														
 
															-  }
														
 
															-  return 64;
														
 
															-#elif defined(_MSC_VER)
														
 
															-  // MSVC does not have __builtin_clzll. Compose two calls to _BitScanReverse
														
 
															-  unsigned long result = 0;  // NOLINT(runtime/int)
														
 
															-  if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
														
 
															-    return 31 - result;
														
 
															-  }
														
 
															-  if (_BitScanReverse(&result, n)) {
														
 
															-    return 63 - result;
														
 
															-  }
														
 
															-  return 64;
														
 
															-#elif defined(__GNUC__)
														
 
															-
														
 
															-  // Handle 0 as a special case because __builtin_clzll(0) is undefined.
														
 
															-  if (n == 0) {
														
 
															-    return 64;
														
 
															-  }
														
 
															-  return __builtin_clzll(n);
														
 
															-#else
														
 
															-  return CountLeadingZeros64Slow(n);
														
 
															-#endif
														
 
															-}
														
 
															-
														
 
															-static inline int MostSignificantBit64(uint64_t n) {
														
 
															-  return 64 - CountLeadingZeros64(n);
														
 
															-}
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc
@@ -1,52 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
														
 
															-
														
 
															-#include <string.h>
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
														
 
															-
														
 
															-void FftCompute(struct FftState* state, const int16_t* input,
														
 
															-                int input_scale_shift) {
														
 
															-  const size_t input_size = state->input_size;
														
 
															-  const size_t fft_size = state->fft_size;
														
 
															-
														
 
															-  int16_t* fft_input = state->input;
														
 
															-  // First, scale the input by the given shift.
														
 
															-  size_t i;
														
 
															-  for (i = 0; i < input_size; ++i) {
														
 
															-    fft_input[i] = static_cast<int16_t>(static_cast<uint16_t>(input[i])
														
 
															-                                        << input_scale_shift);
														
 
															-  }
														
 
															-  // Zero out whatever else remains in the top part of the input.
														
 
															-  for (; i < fft_size; ++i) {
														
 
															-    fft_input[i] = 0;
														
 
															-  }
														
 
															-
														
 
															-  // Apply the FFT.
														
 
															-  kissfft_fixed16::kiss_fftr(
														
 
															-      reinterpret_cast<kissfft_fixed16::kiss_fftr_cfg>(state->scratch),
														
 
															-      state->input,
														
 
															-      reinterpret_cast<kissfft_fixed16::kiss_fft_cpx*>(state->output));
														
 
															-}
														
 
															-
														
 
															-void FftInit(struct FftState* state) {
														
 
															-  // All the initialization is done in FftPopulateState()
														
 
															-}
														
 
															-
														
 
															-void FftReset(struct FftState* state) {
														
 
															-  memset(state->input, 0, state->fft_size * sizeof(*state->input));
														
 
															-  memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output));
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h
@@ -1,50 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-#include <stdlib.h>
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct complex_int16_t {
														
 
															-  int16_t real;
														
 
															-  int16_t imag;
														
 
															-};
														
 
															-
														
 
															-struct FftState {
														
 
															-  int16_t* input;
														
 
															-  struct complex_int16_t* output;
														
 
															-  size_t fft_size;
														
 
															-  size_t input_size;
														
 
															-  void* scratch;
														
 
															-  size_t scratch_size;
														
 
															-};
														
 
															-
														
 
															-void FftCompute(struct FftState* state, const int16_t* input,
														
 
															-                int input_scale_shift);
														
 
															-
														
 
															-void FftInit(struct FftState* state);
														
 
															-
														
 
															-void FftReset(struct FftState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc
@@ -1,70 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
														
 
															-
														
 
															-#include <stdio.h>
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
														
 
															-
														
 
															-int FftPopulateState(struct FftState* state, size_t input_size) {
														
 
															-  state->input_size = input_size;
														
 
															-  state->fft_size = 1;
														
 
															-  while (state->fft_size < state->input_size) {
														
 
															-    state->fft_size <<= 1;
														
 
															-  }
														
 
															-
														
 
															-  state->input = reinterpret_cast<int16_t*>(
														
 
															-      malloc(state->fft_size * sizeof(*state->input)));
														
 
															-  if (state->input == nullptr) {
														
 
															-    fprintf(stderr, "Failed to alloc fft input buffer\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  state->output = reinterpret_cast<complex_int16_t*>(
														
 
															-      malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
														
 
															-  if (state->output == nullptr) {
														
 
															-    fprintf(stderr, "Failed to alloc fft output buffer\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  // Ask kissfft how much memory it wants.
														
 
															-  size_t scratch_size = 0;
														
 
															-  kissfft_fixed16::kiss_fftr_cfg kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(
														
 
															-      state->fft_size, 0, nullptr, &scratch_size);
														
 
															-  if (kfft_cfg != nullptr) {
														
 
															-    fprintf(stderr, "Kiss memory sizing failed.\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-  state->scratch = malloc(scratch_size);
														
 
															-  if (state->scratch == nullptr) {
														
 
															-    fprintf(stderr, "Failed to alloc fft scratch buffer\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-  state->scratch_size = scratch_size;
														
 
															-  // Let kissfft configure the scratch space we just allocated
														
 
															-  kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(state->fft_size, 0,
														
 
															-                                              state->scratch, &scratch_size);
														
 
															-  if (kfft_cfg != state->scratch) {
														
 
															-    fprintf(stderr, "Kiss memory preallocation strategy failed.\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-  return 1;
														
 
															-}
														
 
															-
														
 
															-void FftFreeStateContents(struct FftState* state) {
														
 
															-  free(state->input);
														
 
															-  free(state->output);
														
 
															-  free(state->scratch);
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h
@@ -1,34 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-// Prepares and FFT for the given input size.
														
 
															-int FftPopulateState(struct FftState* state, size_t input_size);
														
 
															-
														
 
															-// Frees any allocated buffers.
														
 
															-void FftFreeStateContents(struct FftState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
@@ -1,134 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
														
 
															-
														
 
															-#include <string.h>
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
														
 
															-
														
 
															-void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
														
 
															-                                         struct complex_int16_t* fft_output,
														
 
															-                                         int32_t* energy) {
														
 
															-  const int end_index = state->end_index;
														
 
															-  int i;
														
 
															-  energy += state->start_index;
														
 
															-  fft_output += state->start_index;
														
 
															-  for (i = state->start_index; i < end_index; ++i) {
														
 
															-    const int32_t real = fft_output->real;
														
 
															-    const int32_t imag = fft_output->imag;
														
 
															-    fft_output++;
														
 
															-    const uint32_t mag_squared = (real * real) + (imag * imag);
														
 
															-    *energy++ = mag_squared;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void FilterbankAccumulateChannels(struct FilterbankState* state,
														
 
															-                                  const int32_t* energy) {
														
 
															-  uint64_t* work = state->work;
														
 
															-  uint64_t weight_accumulator = 0;
														
 
															-  uint64_t unweight_accumulator = 0;
														
 
															-
														
 
															-  const int16_t* channel_frequency_starts = state->channel_frequency_starts;
														
 
															-  const int16_t* channel_weight_starts = state->channel_weight_starts;
														
 
															-  const int16_t* channel_widths = state->channel_widths;
														
 
															-
														
 
															-  int num_channels_plus_1 = state->num_channels + 1;
														
 
															-  int i;
														
 
															-  for (i = 0; i < num_channels_plus_1; ++i) {
														
 
															-    const int32_t* magnitudes = energy + *channel_frequency_starts++;
														
 
															-    const int16_t* weights = state->weights + *channel_weight_starts;
														
 
															-    const int16_t* unweights = state->unweights + *channel_weight_starts++;
														
 
															-    const int width = *channel_widths++;
														
 
															-    int j;
														
 
															-    for (j = 0; j < width; ++j) {
														
 
															-      weight_accumulator += *weights++ * ((uint64_t)*magnitudes);
														
 
															-      unweight_accumulator += *unweights++ * ((uint64_t)*magnitudes);
														
 
															-      ++magnitudes;
														
 
															-    }
														
 
															-    *work++ = weight_accumulator;
														
 
															-    weight_accumulator = unweight_accumulator;
														
 
															-    unweight_accumulator = 0;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-static uint16_t Sqrt32(uint32_t num) {
														
 
															-  if (num == 0) {
														
 
															-    return 0;
														
 
															-  }
														
 
															-  uint32_t res = 0;
														
 
															-  int max_bit_number = 32 - MostSignificantBit32(num);
														
 
															-  max_bit_number |= 1;
														
 
															-  uint32_t bit = 1U << (31 - max_bit_number);
														
 
															-  int iterations = (31 - max_bit_number) / 2 + 1;
														
 
															-  while (iterations--) {
														
 
															-    if (num >= res + bit) {
														
 
															-      num -= res + bit;
														
 
															-      res = (res >> 1U) + bit;
														
 
															-    } else {
														
 
															-      res >>= 1U;
														
 
															-    }
														
 
															-    bit >>= 2U;
														
 
															-  }
														
 
															-  // Do rounding - if we have the bits.
														
 
															-  if (num > res && res != 0xFFFF) {
														
 
															-    ++res;
														
 
															-  }
														
 
															-  return res;
														
 
															-}
														
 
															-
														
 
															-static uint32_t Sqrt64(uint64_t num) {
														
 
															-  // Take a shortcut and just use 32 bit operations if the upper word is all
														
 
															-  // clear. This will cause a slight off by one issue for numbers close to 2^32,
														
 
															-  // but it probably isn't going to matter (and gives us a big performance win).
														
 
															-  if ((num >> 32) == 0) {
														
 
															-    return Sqrt32((uint32_t)num);
														
 
															-  }
														
 
															-  uint64_t res = 0;
														
 
															-  int max_bit_number = 64 - MostSignificantBit64(num);
														
 
															-  max_bit_number |= 1;
														
 
															-  uint64_t bit = 1ULL << (63 - max_bit_number);
														
 
															-  int iterations = (63 - max_bit_number) / 2 + 1;
														
 
															-  while (iterations--) {
														
 
															-    if (num >= res + bit) {
														
 
															-      num -= res + bit;
														
 
															-      res = (res >> 1U) + bit;
														
 
															-    } else {
														
 
															-      res >>= 1U;
														
 
															-    }
														
 
															-    bit >>= 2U;
														
 
															-  }
														
 
															-  // Do rounding - if we have the bits.
														
 
															-  if (num > res && res != 0xFFFFFFFFLL) {
														
 
															-    ++res;
														
 
															-  }
														
 
															-  return res;
														
 
															-}
														
 
															-
														
 
															-uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
														
 
															-  const int num_channels = state->num_channels;
														
 
															-  const uint64_t* work = state->work + 1;
														
 
															-  // Reuse the work buffer since we're fine clobbering it at this point to hold
														
 
															-  // the output.
														
 
															-  uint32_t* output = (uint32_t*)state->work;
														
 
															-  int i;
														
 
															-  for (i = 0; i < num_channels; ++i) {
														
 
															-    *output++ = Sqrt64(*work++) >> scale_down_shift;
														
 
															-  }
														
 
															-  return (uint32_t*)state->work;
														
 
															-}
														
 
															-
														
 
															-void FilterbankReset(struct FilterbankState* state) {
														
 
															-  memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work));
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h
@@ -1,63 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-#include <stdlib.h>
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
														
 
															-
														
 
															-#define kFilterbankBits 12
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct FilterbankState {
														
 
															-  int num_channels;
														
 
															-  int start_index;
														
 
															-  int end_index;
														
 
															-  int16_t* channel_frequency_starts;
														
 
															-  int16_t* channel_weight_starts;
														
 
															-  int16_t* channel_widths;
														
 
															-  int16_t* weights;
														
 
															-  int16_t* unweights;
														
 
															-  uint64_t* work;
														
 
															-};
														
 
															-
														
 
															-// Converts the relevant complex values of an FFT output into energy (the
														
 
															-// square magnitude).
														
 
															-void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
														
 
															-                                         struct complex_int16_t* fft_output,
														
 
															-                                         int32_t* energy);
														
 
															-
														
 
															-// Computes the mel-scale filterbank on the given energy array. Output is cached
														
 
															-// internally - to fetch it, you need to call FilterbankSqrt.
														
 
															-void FilterbankAccumulateChannels(struct FilterbankState* state,
														
 
															-                                  const int32_t* energy);
														
 
															-
														
 
															-// Applies an integer square root to the 64 bit intermediate values of the
														
 
															-// filterbank, and returns a pointer to them. Memory will be invalidated the
														
 
															-// next time FilterbankAccumulateChannels is called.
														
 
															-uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift);
														
 
															-
														
 
															-void FilterbankReset(struct FilterbankState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c
@@ -1,220 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
														
 
															-
														
 
															-#include <assert.h>
														
 
															-#include <math.h>
														
 
															-#include <stdio.h>
														
 
															-
														
 
															-#define kFilterbankIndexAlignment 4
														
 
															-#define kFilterbankChannelBlockSize 4
														
 
															-
														
 
															-void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) {
														
 
															-  config->num_channels = 32;
														
 
															-  config->lower_band_limit = 125.0f;
														
 
															-  config->upper_band_limit = 7500.0f;
														
 
															-  config->output_scale_shift = 7;
														
 
															-}
														
 
															-
														
 
															-static float FreqToMel(float freq) { return 1127.0 * log1p(freq / 700.0); }
														
 
															-
														
 
															-static void CalculateCenterFrequencies(const int num_channels,
														
 
															-                                       const float lower_frequency_limit,
														
 
															-                                       const float upper_frequency_limit,
														
 
															-                                       float* center_frequencies) {
														
 
															-  assert(lower_frequency_limit >= 0.0f);
														
 
															-  assert(upper_frequency_limit > lower_frequency_limit);
														
 
															-
														
 
															-  const float mel_low = FreqToMel(lower_frequency_limit);
														
 
															-  const float mel_hi = FreqToMel(upper_frequency_limit);
														
 
															-  const float mel_span = mel_hi - mel_low;
														
 
															-  const float mel_spacing = mel_span / ((float)num_channels);
														
 
															-  int i;
														
 
															-  for (i = 0; i < num_channels; ++i) {
														
 
															-    center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight,
														
 
															-                                      int16_t* unweight) {
														
 
															-  *weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
														
 
															-  *unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
														
 
															-}
														
 
															-
														
 
															-int FilterbankPopulateState(const struct FilterbankConfig* config,
														
 
															-                            struct FilterbankState* state, int sample_rate,
														
 
															-                            int spectrum_size) {
														
 
															-  state->num_channels = config->num_channels;
														
 
															-  const int num_channels_plus_1 = config->num_channels + 1;
														
 
															-
														
 
															-  // How should we align things to index counts given the byte alignment?
														
 
															-  const int index_alignment =
														
 
															-      (kFilterbankIndexAlignment < sizeof(int16_t)
														
 
															-           ? 1
														
 
															-           : kFilterbankIndexAlignment / sizeof(int16_t));
														
 
															-
														
 
															-  state->channel_frequency_starts =
														
 
															-      malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
														
 
															-  state->channel_weight_starts =
														
 
															-      malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
														
 
															-  state->channel_widths =
														
 
															-      malloc(num_channels_plus_1 * sizeof(*state->channel_widths));
														
 
															-  state->work = malloc(num_channels_plus_1 * sizeof(*state->work));
														
 
															-
														
 
															-  float* center_mel_freqs =
														
 
															-      malloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
														
 
															-  int16_t* actual_channel_starts =
														
 
															-      malloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
														
 
															-  int16_t* actual_channel_widths =
														
 
															-      malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
														
 
															-
														
 
															-  if (state->channel_frequency_starts == NULL ||
														
 
															-      state->channel_weight_starts == NULL || state->channel_widths == NULL ||
														
 
															-      center_mel_freqs == NULL || actual_channel_starts == NULL ||
														
 
															-      actual_channel_widths == NULL) {
														
 
															-    free(center_mel_freqs);
														
 
															-    free(actual_channel_starts);
														
 
															-    free(actual_channel_widths);
														
 
															-    fprintf(stderr, "Failed to allocate channel buffers\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
														
 
															-                             config->upper_band_limit, center_mel_freqs);
														
 
															-
														
 
															-  // Always exclude DC.
														
 
															-  const float hz_per_sbin = 0.5 * sample_rate / ((float)spectrum_size - 1);
														
 
															-  state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
														
 
															-  state->end_index = 0;  // Initialized to zero here, but actually set below.
														
 
															-
														
 
															-  // For each channel, we need to figure out what frequencies belong to it, and
														
 
															-  // how much padding we need to add so that we can efficiently multiply the
														
 
															-  // weights and unweights for accumulation. To simplify the multiplication
														
 
															-  // logic, all channels will have some multiplication to do (even if there are
														
 
															-  // no frequencies that accumulate to that channel) - they will be directed to
														
 
															-  // a set of zero weights.
														
 
															-  int chan_freq_index_start = state->start_index;
														
 
															-  int weight_index_start = 0;
														
 
															-  int needs_zeros = 0;
														
 
															-
														
 
															-  int chan;
														
 
															-  for (chan = 0; chan < num_channels_plus_1; ++chan) {
														
 
															-    // Keep jumping frequencies until we overshoot the bound on this channel.
														
 
															-    int freq_index = chan_freq_index_start;
														
 
															-    while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) {
														
 
															-      ++freq_index;
														
 
															-    }
														
 
															-
														
 
															-    const int width = freq_index - chan_freq_index_start;
														
 
															-    actual_channel_starts[chan] = chan_freq_index_start;
														
 
															-    actual_channel_widths[chan] = width;
														
 
															-
														
 
															-    if (width == 0) {
														
 
															-      // This channel doesn't actually get anything from the frequencies, it's
														
 
															-      // always zero. We need then to insert some 'zero' weights into the
														
 
															-      // output, and just redirect this channel to do a single multiplication at
														
 
															-      // this point. For simplicity, the zeros are placed at the beginning of
														
 
															-      // the weights arrays, so we have to go and update all the other
														
 
															-      // weight_starts to reflect this shift (but only once).
														
 
															-      state->channel_frequency_starts[chan] = 0;
														
 
															-      state->channel_weight_starts[chan] = 0;
														
 
															-      state->channel_widths[chan] = kFilterbankChannelBlockSize;
														
 
															-      if (!needs_zeros) {
														
 
															-        needs_zeros = 1;
														
 
															-        int j;
														
 
															-        for (j = 0; j < chan; ++j) {
														
 
															-          state->channel_weight_starts[j] += kFilterbankChannelBlockSize;
														
 
															-        }
														
 
															-        weight_index_start += kFilterbankChannelBlockSize;
														
 
															-      }
														
 
															-    } else {
														
 
															-      // How far back do we need to go to ensure that we have the proper
														
 
															-      // alignment?
														
 
															-      const int aligned_start =
														
 
															-          (chan_freq_index_start / index_alignment) * index_alignment;
														
 
															-      const int aligned_width = (chan_freq_index_start - aligned_start + width);
														
 
															-      const int padded_width =
														
 
															-          (((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
														
 
															-          kFilterbankChannelBlockSize;
														
 
															-
														
 
															-      state->channel_frequency_starts[chan] = aligned_start;
														
 
															-      state->channel_weight_starts[chan] = weight_index_start;
														
 
															-      state->channel_widths[chan] = padded_width;
														
 
															-      weight_index_start += padded_width;
														
 
															-    }
														
 
															-    chan_freq_index_start = freq_index;
														
 
															-  }
														
 
															-
														
 
															-  // Allocate the two arrays to store the weights - weight_index_start contains
														
 
															-  // the index of what would be the next set of weights that we would need to
														
 
															-  // add, so that's how many weights we need to allocate.
														
 
															-  state->weights = calloc(weight_index_start, sizeof(*state->weights));
														
 
															-  state->unweights = calloc(weight_index_start, sizeof(*state->unweights));
														
 
															-
														
 
															-  // If the alloc failed, we also need to nuke the arrays.
														
 
															-  if (state->weights == NULL || state->unweights == NULL) {
														
 
															-    free(center_mel_freqs);
														
 
															-    free(actual_channel_starts);
														
 
															-    free(actual_channel_widths);
														
 
															-    fprintf(stderr, "Failed to allocate weights or unweights\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  // Next pass, compute all the weights. Since everything has been memset to
														
 
															-  // zero, we only need to fill in the weights that correspond to some frequency
														
 
															-  // for a channel.
														
 
															-  const float mel_low = FreqToMel(config->lower_band_limit);
														
 
															-  for (chan = 0; chan < num_channels_plus_1; ++chan) {
														
 
															-    int frequency = actual_channel_starts[chan];
														
 
															-    const int num_frequencies = actual_channel_widths[chan];
														
 
															-    const int frequency_offset =
														
 
															-        frequency - state->channel_frequency_starts[chan];
														
 
															-    const int weight_start = state->channel_weight_starts[chan];
														
 
															-    const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1];
														
 
															-
														
 
															-    int j;
														
 
															-    for (j = 0; j < num_frequencies; ++j, ++frequency) {
														
 
															-      const float weight =
														
 
															-          (center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) /
														
 
															-          (center_mel_freqs[chan] - denom_val);
														
 
															-
														
 
															-      // Make the float into an integer for the weights (and unweights).
														
 
															-      const int weight_index = weight_start + frequency_offset + j;
														
 
															-      QuantizeFilterbankWeights(weight, state->weights + weight_index,
														
 
															-                                state->unweights + weight_index);
														
 
															-    }
														
 
															-    if (frequency > state->end_index) {
														
 
															-      state->end_index = frequency;
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  free(center_mel_freqs);
														
 
															-  free(actual_channel_starts);
														
 
															-  free(actual_channel_widths);
														
 
															-  if (state->end_index >= spectrum_size) {
														
 
															-    fprintf(stderr, "Filterbank end_index is above spectrum size.\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-  return 1;
														
 
															-}
														
 
															-
														
 
															-void FilterbankFreeStateContents(struct FilterbankState* state) {
														
 
															-  free(state->channel_frequency_starts);
														
 
															-  free(state->channel_weight_starts);
														
 
															-  free(state->channel_widths);
														
 
															-  free(state->weights);
														
 
															-  free(state->unweights);
														
 
															-  free(state->work);
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h
@@ -1,50 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct FilterbankConfig {
														
 
															-  // number of frequency channel buckets for filterbank
														
 
															-  int num_channels;
														
 
															-  // maximum frequency to include
														
 
															-  float upper_band_limit;
														
 
															-  // minimum frequency to include
														
 
															-  float lower_band_limit;
														
 
															-  // unused
														
 
															-  int output_scale_shift;
														
 
															-};
														
 
															-
														
 
															-// Fills the frontendConfig with "sane" defaults.
														
 
															-void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config);
														
 
															-
														
 
															-// Allocates any buffers.
														
 
															-int FilterbankPopulateState(const struct FilterbankConfig* config,
														
 
															-                            struct FilterbankState* state, int sample_rate,
														
 
															-                            int spectrum_size);
														
 
															-
														
 
															-// Frees any allocated buffers.
														
 
															-void FilterbankFreeStateContents(struct FilterbankState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c
@@ -1,72 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
														
 
															-
														
 
															-struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
														
 
															-                                             const int16_t* samples,
														
 
															-                                             size_t num_samples,
														
 
															-                                             size_t* num_samples_read) {
														
 
															-  struct FrontendOutput output;
														
 
															-  output.values = NULL;
														
 
															-  output.size = 0;
														
 
															-
														
 
															-  // Try to apply the window - if it fails, return and wait for more data.
														
 
															-  if (!WindowProcessSamples(&state->window, samples, num_samples,
														
 
															-                            num_samples_read)) {
														
 
															-    return output;
														
 
															-  }
														
 
															-
														
 
															-  // Apply the FFT to the window's output (and scale it so that the fixed point
														
 
															-  // FFT can have as much resolution as possible).
														
 
															-  int input_shift =
														
 
															-      15 - MostSignificantBit32(state->window.max_abs_output_value);
														
 
															-  FftCompute(&state->fft, state->window.output, input_shift);
														
 
															-
														
 
															-  // We can re-ruse the fft's output buffer to hold the energy.
														
 
															-  int32_t* energy = (int32_t*)state->fft.output;
														
 
															-
														
 
															-  FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
														
 
															-                                      energy);
														
 
															-
														
 
															-  FilterbankAccumulateChannels(&state->filterbank, energy);
														
 
															-  uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift);
														
 
															-
														
 
															-  // Apply noise reduction.
														
 
															-  NoiseReductionApply(&state->noise_reduction, scaled_filterbank);
														
 
															-
														
 
															-  if (state->pcan_gain_control.enable_pcan) {
														
 
															-    PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank);
														
 
															-  }
														
 
															-
														
 
															-  // Apply the log and scale.
														
 
															-  int correction_bits =
														
 
															-      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
														
 
															-  uint16_t* logged_filterbank =
														
 
															-      LogScaleApply(&state->log_scale, scaled_filterbank,
														
 
															-                    state->filterbank.num_channels, correction_bits);
														
 
															-
														
 
															-  output.size = state->filterbank.num_channels;
														
 
															-  output.values = logged_filterbank;
														
 
															-  return output;
														
 
															-}
														
 
															-
														
 
															-void FrontendReset(struct FrontendState* state) {
														
 
															-  WindowReset(&state->window);
														
 
															-  FftReset(&state->fft);
														
 
															-  FilterbankReset(&state->filterbank);
														
 
															-  NoiseReductionReset(&state->noise_reduction);
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h
@@ -1,64 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-#include <stdlib.h>
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct FrontendState {
														
 
															-  struct WindowState window;
														
 
															-  struct FftState fft;
														
 
															-  struct FilterbankState filterbank;
														
 
															-  struct NoiseReductionState noise_reduction;
														
 
															-  struct PcanGainControlState pcan_gain_control;
														
 
															-  struct LogScaleState log_scale;
														
 
															-};
														
 
															-
														
 
															-struct FrontendOutput {
														
 
															-  const uint16_t* values;
														
 
															-  size_t size;
														
 
															-};
														
 
															-
														
 
															-// Main entry point to processing frontend samples. Updates num_samples_read to
														
 
															-// contain the number of samples that have been consumed from the input array.
														
 
															-// Returns a struct containing the generated output. If not enough samples were
														
 
															-// added to generate a feature vector, the returned size will be 0 and the
														
 
															-// values pointer will be NULL. Note that the output pointer will be invalidated
														
 
															-// as soon as FrontendProcessSamples is called again, so copy the contents
														
 
															-// elsewhere if you need to use them later.
														
 
															-struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
														
 
															-                                             const int16_t* samples,
														
 
															-                                             size_t num_samples,
														
 
															-                                             size_t* num_samples_read);
														
 
															-
														
 
															-void FrontendReset(struct FrontendState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c
@@ -1,85 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
														
 
															-
														
 
															-#include <stdio.h>
														
 
															-#include <string.h>
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
														
 
															-
														
 
															-void FrontendFillConfigWithDefaults(struct FrontendConfig* config) {
														
 
															-  WindowFillConfigWithDefaults(&config->window);
														
 
															-  FilterbankFillConfigWithDefaults(&config->filterbank);
														
 
															-  NoiseReductionFillConfigWithDefaults(&config->noise_reduction);
														
 
															-  PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control);
														
 
															-  LogScaleFillConfigWithDefaults(&config->log_scale);
														
 
															-}
														
 
															-
														
 
															-int FrontendPopulateState(const struct FrontendConfig* config,
														
 
															-                          struct FrontendState* state, int sample_rate) {
														
 
															-  memset(state, 0, sizeof(*state));
														
 
															-
														
 
															-  if (!WindowPopulateState(&config->window, &state->window, sample_rate)) {
														
 
															-    fprintf(stderr, "Failed to populate window state\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  if (!FftPopulateState(&state->fft, state->window.size)) {
														
 
															-    fprintf(stderr, "Failed to populate fft state\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-  FftInit(&state->fft);
														
 
															-
														
 
															-  if (!FilterbankPopulateState(&config->filterbank, &state->filterbank,
														
 
															-                               sample_rate, state->fft.fft_size / 2 + 1)) {
														
 
															-    fprintf(stderr, "Failed to populate filterbank state\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  if (!NoiseReductionPopulateState(&config->noise_reduction,
														
 
															-                                   &state->noise_reduction,
														
 
															-                                   state->filterbank.num_channels)) {
														
 
															-    fprintf(stderr, "Failed to populate noise reduction state\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  int input_correction_bits =
														
 
															-      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
														
 
															-  if (!PcanGainControlPopulateState(
														
 
															-          &config->pcan_gain_control, &state->pcan_gain_control,
														
 
															-          state->noise_reduction.estimate, state->filterbank.num_channels,
														
 
															-          state->noise_reduction.smoothing_bits, input_correction_bits)) {
														
 
															-    fprintf(stderr, "Failed to populate pcan gain control state\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  if (!LogScalePopulateState(&config->log_scale, &state->log_scale)) {
														
 
															-    fprintf(stderr, "Failed to populate log scale state\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  FrontendReset(state);
														
 
															-
														
 
															-  // All good, return a true value.
														
 
															-  return 1;
														
 
															-}
														
 
															-
														
 
															-void FrontendFreeStateContents(struct FrontendState* state) {
														
 
															-  WindowFreeStateContents(&state->window);
														
 
															-  FftFreeStateContents(&state->fft);
														
 
															-  FilterbankFreeStateContents(&state->filterbank);
														
 
															-  NoiseReductionFreeStateContents(&state->noise_reduction);
														
 
															-  PcanGainControlFreeStateContents(&state->pcan_gain_control);
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h
@@ -1,52 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct FrontendConfig {
														
 
															-  struct WindowConfig window;
														
 
															-  struct FilterbankConfig filterbank;
														
 
															-  struct NoiseReductionConfig noise_reduction;
														
 
															-  struct PcanGainControlConfig pcan_gain_control;
														
 
															-  struct LogScaleConfig log_scale;
														
 
															-};
														
 
															-
														
 
															-// Fills the frontendConfig with "sane" defaults.
														
 
															-void FrontendFillConfigWithDefaults(struct FrontendConfig* config);
														
 
															-
														
 
															-// Allocates any buffers.
														
 
															-int FrontendPopulateState(const struct FrontendConfig* config,
														
 
															-                          struct FrontendState* state, int sample_rate);
														
 
															-
														
 
															-// Frees any allocated buffers.
														
 
															-void FrontendFreeStateContents(struct FrontendState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h
@@ -1,48 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
														
 
															-
														
 
															-// This header file should be included in all variants of kiss_fft_$type.{h,cc}
														
 
															-// so that their sub-included source files do not mistakenly wrap libc header
														
 
															-// files within their kissfft_$type namespaces.
														
 
															-// E.g, This header avoids kissfft_int16.h containing:
														
 
															-//   namespace kiss_fft_int16 {
														
 
															-//     #include "kiss_fft.h"
														
 
															-//   }
														
 
															-// where kiss_fft_.h contains:
														
 
															-//   #include <math.h>
														
 
															-//
														
 
															-// TRICK: By including the following header files here, their preprocessor
														
 
															-// header guards prevent them being re-defined inside of the kiss_fft_$type
														
 
															-// namespaces declared within the kiss_fft_$type.{h,cc} sources.
														
 
															-// Note that the original kiss_fft*.h files are untouched since they
														
 
															-// may be used in libraries that include them directly.
														
 
															-
														
 
															-#include <limits.h>
														
 
															-#include <math.h>
														
 
															-#include <stdio.h>
														
 
															-#include <stdlib.h>
														
 
															-#include <string.h>
														
 
															-
														
 
															-#ifdef FIXED_POINT
														
 
															-#include <sys/types.h>
														
 
															-#endif
														
 
															-
														
 
															-#ifdef USE_SIMD
														
 
															-#include <xmmintrin.h>
														
 
															-#endif
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h
@@ -1,33 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h"
														
 
															-
														
 
															-// Wrap 16-bit kiss fft in its own namespace. Enables us to link an application
														
 
															-// with different kiss fft resultions (16/32 bit interger, float, double)
														
 
															-// without getting a linker error.
														
 
															-#define FIXED_POINT 16
														
 
															-namespace kissfft_fixed16 {
														
 
															-#include "kiss_fft.h"
														
 
															-#include "tools/kiss_fftr.h"
														
 
															-}  // namespace kissfft_fixed16
														
 
															-#undef FIXED_POINT
														
 
															-#undef kiss_fft_scalar
														
 
															-#undef KISS_FFT_H
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c
@@ -1,30 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
														
 
															-const uint16_t kLogLut[]
														
 
															-#ifndef _MSC_VER
														
 
															-    __attribute__((aligned(4)))
														
 
															-#endif  // _MSV_VER
														
 
															-    = {0,    224,  442,  654,  861,  1063, 1259, 1450, 1636, 1817, 1992, 2163,
														
 
															-       2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848,
														
 
															-       3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934,
														
 
															-       5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507,
														
 
															-       5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633,
														
 
															-       5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370,
														
 
															-       5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762,
														
 
															-       4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848,
														
 
															-       3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659,
														
 
															-       2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224,
														
 
															-       1094, 963,  830,  695,  559,  421,  282,  142,  0,    0};
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h
@@ -1,40 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-// Number of segments in the log lookup table. The table will be kLogSegments+1
														
 
															-// in length (with some padding).
														
 
															-#define kLogSegments 128
														
 
															-#define kLogSegmentsLog2 7
														
 
															-
														
 
															-// Scale used by lookup table.
														
 
															-#define kLogScale 65536
														
 
															-#define kLogScaleLog2 16
														
 
															-#define kLogCoeff 45426
														
 
															-
														
 
															-extern const uint16_t kLogLut[];
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
@@ -1,83 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
														
 
															-
														
 
															-#define kuint16max 0x0000FFFF
														
 
															-
														
 
															-// The following functions implement integer logarithms of various sizes. The
														
 
															-// approximation is calculated according to method described in
														
 
															-//       www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/
														
 
															-//       publicaciones/SPL2007/Log10-spl07.pdf
														
 
															-// It first calculates log2 of the input and then converts it to natural
														
 
															-// logarithm.
														
 
															-
														
 
															-static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
														
 
															-  // Part 1
														
 
															-  int32_t frac = x - (1LL << log2x);
														
 
															-  if (log2x < kLogScaleLog2) {
														
 
															-    frac <<= kLogScaleLog2 - log2x;
														
 
															-  } else {
														
 
															-    frac >>= log2x - kLogScaleLog2;
														
 
															-  }
														
 
															-  // Part 2
														
 
															-  const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
														
 
															-  const uint32_t seg_unit =
														
 
															-      (((uint32_t)1) << kLogScaleLog2) >> kLogSegmentsLog2;
														
 
															-
														
 
															-  const int32_t c0 = kLogLut[base_seg];
														
 
															-  const int32_t c1 = kLogLut[base_seg + 1];
														
 
															-  const int32_t seg_base = seg_unit * base_seg;
														
 
															-  const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2;
														
 
															-  return frac + c0 + rel_pos;
														
 
															-}
														
 
															-
														
 
															-static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
														
 
															-  const uint32_t integer = MostSignificantBit32(x) - 1;
														
 
															-  const uint32_t fraction = Log2FractionPart(x, integer);
														
 
															-  const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
														
 
															-  const uint32_t round = kLogScale / 2;
														
 
															-  const uint32_t loge = (((uint64_t)kLogCoeff) * log2 + round) >> kLogScaleLog2;
														
 
															-  // Finally scale to our output scale
														
 
															-  const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
														
 
															-  return loge_scaled;
														
 
															-}
														
 
															-
														
 
															-uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
														
 
															-                        int signal_size, int correction_bits) {
														
 
															-  const int scale_shift = state->scale_shift;
														
 
															-  uint16_t* output = (uint16_t*)signal;
														
 
															-  uint16_t* ret = output;
														
 
															-  int i;
														
 
															-  for (i = 0; i < signal_size; ++i) {
														
 
															-    uint32_t value = *signal++;
														
 
															-    if (state->enable_log) {
														
 
															-      if (correction_bits < 0) {
														
 
															-        value >>= -correction_bits;
														
 
															-      } else {
														
 
															-        value <<= correction_bits;
														
 
															-      }
														
 
															-      if (value > 1) {
														
 
															-        value = Log(value, scale_shift);
														
 
															-      } else {
														
 
															-        value = 0;
														
 
															-      }
														
 
															-    }
														
 
															-    *output++ = (value < kuint16max) ? value : kuint16max;
														
 
															-  }
														
 
															-  return ret;
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h
@@ -1,39 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-#include <stdlib.h>
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct LogScaleState {
														
 
															-  int enable_log;
														
 
															-  int scale_shift;
														
 
															-};
														
 
															-
														
 
															-// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note
														
 
															-// that the signal array will be modified.
														
 
															-uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
														
 
															-                        int signal_size, int correction_bits);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c
@@ -1,27 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
														
 
															-
														
 
															-void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) {
														
 
															-  config->enable_log = 1;
														
 
															-  config->scale_shift = 6;
														
 
															-}
														
 
															-
														
 
															-int LogScalePopulateState(const struct LogScaleConfig* config,
														
 
															-                          struct LogScaleState* state) {
														
 
															-  state->enable_log = config->enable_log;
														
 
															-  state->scale_shift = config->scale_shift;
														
 
															-  return 1;
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h
@@ -1,45 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-#include <stdlib.h>
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct LogScaleConfig {
														
 
															-  // set to false (0) to disable this module
														
 
															-  int enable_log;
														
 
															-  // scale results by 2^(scale_shift)
														
 
															-  int scale_shift;
														
 
															-};
														
 
															-
														
 
															-// Populates the LogScaleConfig with "sane" default values.
														
 
															-void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config);
														
 
															-
														
 
															-// Allocates any buffers.
														
 
															-int LogScalePopulateState(const struct LogScaleConfig* config,
														
 
															-                          struct LogScaleState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c
@@ -1,51 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
														
 
															-
														
 
															-#include <string.h>
														
 
															-
														
 
															-void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
														
 
															-  int i;
														
 
															-  for (i = 0; i < state->num_channels; ++i) {
														
 
															-    const uint32_t smoothing =
														
 
															-        ((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing;
														
 
															-    const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing;
														
 
															-
														
 
															-    // Update the estimate of the noise.
														
 
															-    const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
														
 
															-    uint32_t estimate =
														
 
															-        (((uint64_t)signal_scaled_up * smoothing) +
														
 
															-         ((uint64_t)state->estimate[i] * one_minus_smoothing)) >>
														
 
															-        kNoiseReductionBits;
														
 
															-    state->estimate[i] = estimate;
														
 
															-
														
 
															-    // Make sure that we can't get a negative value for the signal - estimate.
														
 
															-    if (estimate > signal_scaled_up) {
														
 
															-      estimate = signal_scaled_up;
														
 
															-    }
														
 
															-
														
 
															-    const uint32_t floor =
														
 
															-        ((uint64_t)signal[i] * state->min_signal_remaining) >>
														
 
															-        kNoiseReductionBits;
														
 
															-    const uint32_t subtracted =
														
 
															-        (signal_scaled_up - estimate) >> state->smoothing_bits;
														
 
															-    const uint32_t output = subtracted > floor ? subtracted : floor;
														
 
															-    signal[i] = output;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void NoiseReductionReset(struct NoiseReductionState* state) {
														
 
															-  memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels);
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h
@@ -1,46 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
														
 
															-
														
 
															-#define kNoiseReductionBits 14
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-#include <stdlib.h>
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct NoiseReductionState {
														
 
															-  int smoothing_bits;
														
 
															-  uint16_t even_smoothing;
														
 
															-  uint16_t odd_smoothing;
														
 
															-  uint16_t min_signal_remaining;
														
 
															-  int num_channels;
														
 
															-  uint32_t* estimate;
														
 
															-};
														
 
															-
														
 
															-// Removes stationary noise from each channel of the signal using a low pass
														
 
															-// filter.
														
 
															-void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal);
														
 
															-
														
 
															-void NoiseReductionReset(struct NoiseReductionState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c
@@ -1,45 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
														
 
															-
														
 
															-#include <stdio.h>
														
 
															-
														
 
															-void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) {
														
 
															-  config->smoothing_bits = 10;
														
 
															-  config->even_smoothing = 0.025;
														
 
															-  config->odd_smoothing = 0.06;
														
 
															-  config->min_signal_remaining = 0.05;
														
 
															-}
														
 
															-
														
 
															-int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
														
 
															-                                struct NoiseReductionState* state,
														
 
															-                                int num_channels) {
														
 
															-  state->smoothing_bits = config->smoothing_bits;
														
 
															-  state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits);
														
 
															-  state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits);
														
 
															-  state->min_signal_remaining =
														
 
															-      config->min_signal_remaining * (1 << kNoiseReductionBits);
														
 
															-  state->num_channels = num_channels;
														
 
															-  state->estimate = calloc(state->num_channels, sizeof(*state->estimate));
														
 
															-  if (state->estimate == NULL) {
														
 
															-    fprintf(stderr, "Failed to alloc estimate buffer\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-  return 1;
														
 
															-}
														
 
															-
														
 
															-void NoiseReductionFreeStateContents(struct NoiseReductionState* state) {
														
 
															-  free(state->estimate);
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h
@@ -1,50 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct NoiseReductionConfig {
														
 
															-  // scale the signal up by 2^(smoothing_bits) before reduction
														
 
															-  int smoothing_bits;
														
 
															-  // smoothing coefficient for even-numbered channels
														
 
															-  float even_smoothing;
														
 
															-  // smoothing coefficient for odd-numbered channels
														
 
															-  float odd_smoothing;
														
 
															-  // fraction of signal to preserve (1.0 disables this module)
														
 
															-  float min_signal_remaining;
														
 
															-};
														
 
															-
														
 
															-// Populates the NoiseReductionConfig with "sane" default values.
														
 
															-void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config);
														
 
															-
														
 
															-// Allocates any buffers.
														
 
															-int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
														
 
															-                                struct NoiseReductionState* state,
														
 
															-                                int num_channels);
														
 
															-
														
 
															-// Frees any allocated buffers.
														
 
															-void NoiseReductionFreeStateContents(struct NoiseReductionState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
@@ -1,56 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
														
 
															-
														
 
															-int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
														
 
															-  if (x <= 2) {
														
 
															-    return lut[x];
														
 
															-  }
														
 
															-
														
 
															-  const int16_t interval = MostSignificantBit32(x);
														
 
															-  lut += 4 * interval - 6;
														
 
															-
														
 
															-  const int16_t frac =
														
 
															-      ((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) &
														
 
															-      0x3FF;
														
 
															-
														
 
															-  int32_t result = ((int32_t)lut[2] * frac) >> 5;
														
 
															-  result += (int32_t)((uint32_t)lut[1] << 5);
														
 
															-  result *= frac;
														
 
															-  result = (result + (1 << 14)) >> 15;
														
 
															-  result += lut[0];
														
 
															-  return (int16_t)result;
														
 
															-}
														
 
															-
														
 
															-uint32_t PcanShrink(const uint32_t x) {
														
 
															-  if (x < (2 << kPcanSnrBits)) {
														
 
															-    return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits);
														
 
															-  } else {
														
 
															-    return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PcanGainControlApply(struct PcanGainControlState* state,
														
 
															-                          uint32_t* signal) {
														
 
															-  int i;
														
 
															-  for (i = 0; i < state->num_channels; ++i) {
														
 
															-    const uint32_t gain =
														
 
															-        WideDynamicFunction(state->noise_estimate[i], state->gain_lut);
														
 
															-    const uint32_t snr = ((uint64_t)signal[i] * gain) >> state->snr_shift;
														
 
															-    signal[i] = PcanShrink(snr);
														
 
															-  }
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h
@@ -1,47 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-#include <stdlib.h>
														
 
															-
														
 
															-#define kPcanSnrBits 12
														
 
															-#define kPcanOutputBits 6
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-// Details at https://research.google/pubs/pub45911.pdf
														
 
															-struct PcanGainControlState {
														
 
															-  int enable_pcan;
														
 
															-  uint32_t* noise_estimate;
														
 
															-  int num_channels;
														
 
															-  int16_t* gain_lut;
														
 
															-  int32_t snr_shift;
														
 
															-};
														
 
															-
														
 
															-int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut);
														
 
															-
														
 
															-uint32_t PcanShrink(const uint32_t x);
														
 
															-
														
 
															-void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
@@ -1,92 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
														
 
															-
														
 
															-#include <math.h>
														
 
															-#include <stdio.h>
														
 
															-
														
 
															-#define kint16max 0x00007FFF
														
 
															-
														
 
															-void PcanGainControlFillConfigWithDefaults(
														
 
															-    struct PcanGainControlConfig* config) {
														
 
															-  config->enable_pcan = 0;
														
 
															-  config->strength = 0.95;
														
 
															-  config->offset = 80.0;
														
 
															-  config->gain_bits = 21;
														
 
															-}
														
 
															-
														
 
															-int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
														
 
															-                               int32_t input_bits, uint32_t x) {
														
 
															-  const float x_as_float = ((float)x) / ((uint32_t)1 << input_bits);
														
 
															-  const float gain_as_float =
														
 
															-      ((uint32_t)1 << config->gain_bits) *
														
 
															-      powf(x_as_float + config->offset, -config->strength);
														
 
															-
														
 
															-  if (gain_as_float > kint16max) {
														
 
															-    return kint16max;
														
 
															-  }
														
 
															-  return (int16_t)(gain_as_float + 0.5f);
														
 
															-}
														
 
															-
														
 
															-int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
														
 
															-                                 struct PcanGainControlState* state,
														
 
															-                                 uint32_t* noise_estimate,
														
 
															-                                 const int num_channels,
														
 
															-                                 const uint16_t smoothing_bits,
														
 
															-                                 const int32_t input_correction_bits) {
														
 
															-  state->enable_pcan = config->enable_pcan;
														
 
															-  if (!state->enable_pcan) {
														
 
															-    return 1;
														
 
															-  }
														
 
															-  state->noise_estimate = noise_estimate;
														
 
															-  state->num_channels = num_channels;
														
 
															-  state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
														
 
															-  if (state->gain_lut == NULL) {
														
 
															-    fprintf(stderr, "Failed to allocate gain LUT\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-  state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits;
														
 
															-
														
 
															-  const int32_t input_bits = smoothing_bits - input_correction_bits;
														
 
															-  state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
														
 
															-  state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
														
 
															-  state->gain_lut -= 6;
														
 
															-  int interval;
														
 
															-  for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
														
 
															-    const uint32_t x0 = (uint32_t)1 << (interval - 1);
														
 
															-    const uint32_t x1 = x0 + (x0 >> 1);
														
 
															-    const uint32_t x2 =
														
 
															-        (interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0;
														
 
															-
														
 
															-    const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
														
 
															-    const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
														
 
															-    const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
														
 
															-
														
 
															-    const int32_t diff1 = (int32_t)y1 - y0;
														
 
															-    const int32_t diff2 = (int32_t)y2 - y0;
														
 
															-    const int32_t a1 = 4 * diff1 - diff2;
														
 
															-    const int32_t a2 = diff2 - a1;
														
 
															-
														
 
															-    state->gain_lut[4 * interval] = y0;
														
 
															-    state->gain_lut[4 * interval + 1] = (int16_t)a1;
														
 
															-    state->gain_lut[4 * interval + 2] = (int16_t)a2;
														
 
															-  }
														
 
															-  state->gain_lut += 6;
														
 
															-  return 1;
														
 
															-}
														
 
															-
														
 
															-void PcanGainControlFreeStateContents(struct PcanGainControlState* state) {
														
 
															-  free(state->gain_lut);
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
@@ -1,57 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
														
 
															-
														
 
															-#define kWideDynamicFunctionBits 32
														
 
															-#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3)
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct PcanGainControlConfig {
														
 
															-  // set to false (0) to disable this module
														
 
															-  int enable_pcan;
														
 
															-  // gain normalization exponent (0.0 disables, 1.0 full strength)
														
 
															-  float strength;
														
 
															-  // positive value added in the normalization denominator
														
 
															-  float offset;
														
 
															-  // number of fractional bits in the gain
														
 
															-  int gain_bits;
														
 
															-};
														
 
															-
														
 
															-void PcanGainControlFillConfigWithDefaults(
														
 
															-    struct PcanGainControlConfig* config);
														
 
															-
														
 
															-int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
														
 
															-                               int32_t input_bits, uint32_t x);
														
 
															-
														
 
															-int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
														
 
															-                                 struct PcanGainControlState* state,
														
 
															-                                 uint32_t* noise_estimate,
														
 
															-                                 const int num_channels,
														
 
															-                                 const uint16_t smoothing_bits,
														
 
															-                                 const int32_t input_correction_bits);
														
 
															-
														
 
															-void PcanGainControlFreeStateContents(struct PcanGainControlState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c
@@ -1,70 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
														
 
															-
														
 
															-#include <string.h>
														
 
															-
														
 
															-int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
														
 
															-                         size_t num_samples, size_t* num_samples_read) {
														
 
															-  const int size = state->size;
														
 
															-
														
 
															-  // Copy samples from the samples buffer over to our local input.
														
 
															-  size_t max_samples_to_copy = state->size - state->input_used;
														
 
															-  if (max_samples_to_copy > num_samples) {
														
 
															-    max_samples_to_copy = num_samples;
														
 
															-  }
														
 
															-  memcpy(state->input + state->input_used, samples,
														
 
															-         max_samples_to_copy * sizeof(*samples));
														
 
															-  *num_samples_read = max_samples_to_copy;
														
 
															-  state->input_used += max_samples_to_copy;
														
 
															-
														
 
															-  if (state->input_used < state->size) {
														
 
															-    // We don't have enough samples to compute a window.
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  // Apply the window to the input.
														
 
															-  const int16_t* coefficients = state->coefficients;
														
 
															-  const int16_t* input = state->input;
														
 
															-  int16_t* output = state->output;
														
 
															-  int i;
														
 
															-  int16_t max_abs_output_value = 0;
														
 
															-  for (i = 0; i < size; ++i) {
														
 
															-    int16_t new_value =
														
 
															-        (((int32_t)*input++) * *coefficients++) >> kFrontendWindowBits;
														
 
															-    *output++ = new_value;
														
 
															-    if (new_value < 0) {
														
 
															-      new_value = -new_value;
														
 
															-    }
														
 
															-    if (new_value > max_abs_output_value) {
														
 
															-      max_abs_output_value = new_value;
														
 
															-    }
														
 
															-  }
														
 
															-  // Shuffle the input down by the step size, and update how much we have used.
														
 
															-  memmove(state->input, state->input + state->step,
														
 
															-          sizeof(*state->input) * (state->size - state->step));
														
 
															-  state->input_used -= state->step;
														
 
															-  state->max_abs_output_value = max_abs_output_value;
														
 
															-
														
 
															-  // Indicate that the output buffer is valid for the next stage.
														
 
															-  return 1;
														
 
															-}
														
 
															-
														
 
															-void WindowReset(struct WindowState* state) {
														
 
															-  memset(state->input, 0, state->size * sizeof(*state->input));
														
 
															-  memset(state->output, 0, state->size * sizeof(*state->output));
														
 
															-  state->input_used = 0;
														
 
															-  state->max_abs_output_value = 0;
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h
@@ -1,49 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
														
 
															-
														
 
															-#include <stdint.h>
														
 
															-#include <stdlib.h>
														
 
															-
														
 
															-#define kFrontendWindowBits 12
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct WindowState {
														
 
															-  size_t size;
														
 
															-  int16_t* coefficients;
														
 
															-  size_t step;
														
 
															-
														
 
															-  int16_t* input;
														
 
															-  size_t input_used;
														
 
															-  int16_t* output;
														
 
															-  int16_t max_abs_output_value;
														
 
															-};
														
 
															-
														
 
															-// Applies a window to the samples coming in, stepping forward at the given
														
 
															-// rate.
														
 
															-int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
														
 
															-                         size_t num_samples, size_t* num_samples_read);
														
 
															-
														
 
															-void WindowReset(struct WindowState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c
@@ -1,73 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
														
 
															-
														
 
															-#include <math.h>
														
 
															-#include <stdio.h>
														
 
															-#include <stdlib.h>
														
 
															-#include <string.h>
														
 
															-
														
 
															-// Some platforms don't have M_PI
														
 
															-#ifndef M_PI
														
 
															-#define M_PI 3.14159265358979323846
														
 
															-#endif
														
 
															-
														
 
															-void WindowFillConfigWithDefaults(struct WindowConfig* config) {
														
 
															-  config->size_ms = 25;
														
 
															-  config->step_size_ms = 10;
														
 
															-}
														
 
															-
														
 
															-int WindowPopulateState(const struct WindowConfig* config,
														
 
															-                        struct WindowState* state, int sample_rate) {
														
 
															-  state->size = config->size_ms * sample_rate / 1000;
														
 
															-  state->step = config->step_size_ms * sample_rate / 1000;
														
 
															-
														
 
															-  state->coefficients = malloc(state->size * sizeof(*state->coefficients));
														
 
															-  if (state->coefficients == NULL) {
														
 
															-    fprintf(stderr, "Failed to allocate window coefficients\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  // Populate the window values.
														
 
															-  const float arg = M_PI * 2.0 / ((float)state->size);
														
 
															-  int i;
														
 
															-  for (i = 0; i < state->size; ++i) {
														
 
															-    float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
														
 
															-    // Scale it to fixed point and round it.
														
 
															-    state->coefficients[i] =
														
 
															-        floor(float_value * (1 << kFrontendWindowBits) + 0.5);
														
 
															-  }
														
 
															-
														
 
															-  state->input_used = 0;
														
 
															-  state->input = malloc(state->size * sizeof(*state->input));
														
 
															-  if (state->input == NULL) {
														
 
															-    fprintf(stderr, "Failed to allocate window input\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  state->output = malloc(state->size * sizeof(*state->output));
														
 
															-  if (state->output == NULL) {
														
 
															-    fprintf(stderr, "Failed to allocate window output\n");
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  return 1;
														
 
															-}
														
 
															-
														
 
															-void WindowFreeStateContents(struct WindowState* state) {
														
 
															-  free(state->coefficients);
														
 
															-  free(state->input);
														
 
															-  free(state->output);
														
 
															-}
														
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h
@@ -1,45 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
														
 
															-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
														
 
															-
														
 
															-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-extern "C" {
														
 
															-#endif
														
 
															-
														
 
															-struct WindowConfig {
														
 
															-  // length of window frame in milliseconds
														
 
															-  size_t size_ms;
														
 
															-  // length of step for next frame in milliseconds
														
 
															-  size_t step_size_ms;
														
 
															-};
														
 
															-
														
 
															-// Populates the WindowConfig with "sane" default values.
														
 
															-void WindowFillConfigWithDefaults(struct WindowConfig* config);
														
 
															-
														
 
															-// Allocates any buffers.
														
 
															-int WindowPopulateState(const struct WindowConfig* config,
														
 
															-                        struct WindowState* state, int sample_rate);
														
 
															-
														
 
															-// Frees any allocated buffers.
														
 
															-void WindowFreeStateContents(struct WindowState* state);
														
 
															-
														
 
															-#ifdef __cplusplus
														
 
															-}  // extern "C"
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h
@@ -1,1180 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
														
 
															-#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
														
 
															-#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
														
 
															-#endif
														
 
															-#endif
														
 
															-
														
 
															-#include <functional>
														
 
															-
														
 
															-#include "fixedpoint/fixedpoint.h"
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-constexpr int kReverseShift = -1;
														
 
															-
														
 
															-inline void GetActivationMinMax(FusedActivationFunctionType ac,
														
 
															-                                float* output_activation_min,
														
 
															-                                float* output_activation_max) {
														
 
															-  switch (ac) {
														
 
															-    case FusedActivationFunctionType::kNone:
														
 
															-      *output_activation_min = std::numeric_limits<float>::lowest();
														
 
															-      *output_activation_max = std::numeric_limits<float>::max();
														
 
															-      break;
														
 
															-    case FusedActivationFunctionType::kRelu:
														
 
															-      *output_activation_min = 0.f;
														
 
															-      *output_activation_max = std::numeric_limits<float>::max();
														
 
															-      break;
														
 
															-    case FusedActivationFunctionType::kRelu1:
														
 
															-      *output_activation_min = -1.f;
														
 
															-      *output_activation_max = 1.f;
														
 
															-      break;
														
 
															-    case FusedActivationFunctionType::kRelu6:
														
 
															-      *output_activation_min = 0.f;
														
 
															-      *output_activation_max = 6.f;
														
 
															-      break;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
														
 
															-                                      T output_activation_max) {
														
 
															-  using std::max;
														
 
															-  using std::min;
														
 
															-  return min(max(x, output_activation_min), output_activation_max);
														
 
															-}
														
 
															-
														
 
															-// Legacy function, left for compatibility only.
														
 
															-template <FusedActivationFunctionType Ac>
														
 
															-float ActivationFunction(float x) {
														
 
															-  float output_activation_min, output_activation_max;
														
 
															-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
														
 
															-  return ActivationFunctionWithMinMax(x, output_activation_min,
														
 
															-                                      output_activation_max);
														
 
															-}
														
 
															-
														
 
															-inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
														
 
															-                         const float* bias_data, int array_size,
														
 
															-                         float* array_data) {
														
 
															-  if (bias_size == 0) return;
														
 
															-  // Note: see b/132215220: in May 2019 we thought it would be OK to replace
														
 
															-  // this with the Eigen one-liner:
														
 
															-  //   return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
														
 
															-  // This turned out to severely regress performance: +4ms (i.e. 8%) on
														
 
															-  // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
														
 
															-  TFLITE_DCHECK_EQ((array_size % bias_size), 0);
														
 
															-#ifdef USE_NEON
														
 
															-  float* array_ptr = array_data;
														
 
															-  float* array_end_ptr = array_ptr + array_size;
														
 
															-  const auto clamp_min_vec = vdupq_n_f32(clamp_min);
														
 
															-  const auto clamp_max_vec = vdupq_n_f32(clamp_max);
														
 
															-  for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
														
 
															-    int i = 0;
														
 
															-    for (; i <= bias_size - 16; i += 16) {
														
 
															-      auto b0 = vld1q_f32(bias_data + i);
														
 
															-      auto b1 = vld1q_f32(bias_data + i + 4);
														
 
															-      auto b2 = vld1q_f32(bias_data + i + 8);
														
 
															-      auto b3 = vld1q_f32(bias_data + i + 12);
														
 
															-      auto a0 = vld1q_f32(array_ptr + i);
														
 
															-      auto a1 = vld1q_f32(array_ptr + i + 4);
														
 
															-      auto a2 = vld1q_f32(array_ptr + i + 8);
														
 
															-      auto a3 = vld1q_f32(array_ptr + i + 12);
														
 
															-      auto x0 = vaddq_f32(a0, b0);
														
 
															-      auto x1 = vaddq_f32(a1, b1);
														
 
															-      auto x2 = vaddq_f32(a2, b2);
														
 
															-      auto x3 = vaddq_f32(a3, b3);
														
 
															-      x0 = vmaxq_f32(clamp_min_vec, x0);
														
 
															-      x1 = vmaxq_f32(clamp_min_vec, x1);
														
 
															-      x2 = vmaxq_f32(clamp_min_vec, x2);
														
 
															-      x3 = vmaxq_f32(clamp_min_vec, x3);
														
 
															-      x0 = vminq_f32(clamp_max_vec, x0);
														
 
															-      x1 = vminq_f32(clamp_max_vec, x1);
														
 
															-      x2 = vminq_f32(clamp_max_vec, x2);
														
 
															-      x3 = vminq_f32(clamp_max_vec, x3);
														
 
															-      vst1q_f32(array_ptr + i, x0);
														
 
															-      vst1q_f32(array_ptr + i + 4, x1);
														
 
															-      vst1q_f32(array_ptr + i + 8, x2);
														
 
															-      vst1q_f32(array_ptr + i + 12, x3);
														
 
															-    }
														
 
															-    for (; i <= bias_size - 4; i += 4) {
														
 
															-      auto b = vld1q_f32(bias_data + i);
														
 
															-      auto a = vld1q_f32(array_ptr + i);
														
 
															-      auto x = vaddq_f32(a, b);
														
 
															-      x = vmaxq_f32(clamp_min_vec, x);
														
 
															-      x = vminq_f32(clamp_max_vec, x);
														
 
															-      vst1q_f32(array_ptr + i, x);
														
 
															-    }
														
 
															-    for (; i < bias_size; i++) {
														
 
															-      array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i],
														
 
															-                                                  clamp_min, clamp_max);
														
 
															-    }
														
 
															-  }
														
 
															-#else  // not NEON
														
 
															-  for (int array_offset = 0; array_offset < array_size;
														
 
															-       array_offset += bias_size) {
														
 
															-    for (int i = 0; i < bias_size; i++) {
														
 
															-      array_data[array_offset + i] = ActivationFunctionWithMinMax(
														
 
															-          array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
														
 
															-    }
														
 
															-  }
														
 
															-#endif
														
 
															-}
														
 
															-
														
 
															-// Single-rounding MultiplyByQuantizedMultiplier
														
 
															-#if TFLITE_SINGLE_ROUNDING
														
 
															-inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
														
 
															-                                             int32_t quantized_multiplier,
														
 
															-                                             int shift) {
														
 
															-  TFLITE_DCHECK(quantized_multiplier >= 0);
														
 
															-  TFLITE_DCHECK(shift >= -31 && shift <= 30);
														
 
															-
														
 
															-  const int64_t total_shift = 31 - shift;
														
 
															-  const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
														
 
															-  int64_t result = x * static_cast<int64_t>(quantized_multiplier) + round;
														
 
															-  result = result >> total_shift;
														
 
															-
														
 
															-  TFLITE_DCHECK(result >= std::numeric_limits<int32_t>::min() &&
														
 
															-                result <= std::numeric_limits<int32_t>::max());
														
 
															-  return static_cast<int32_t>(result);
														
 
															-}
														
 
															-
														
 
															-inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-    int32_t x, int32_t quantized_multiplier, int shift) {
														
 
															-  TFLITE_DCHECK_LE(shift, 0);
														
 
															-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
														
 
															-}
														
 
															-
														
 
															-inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
														
 
															-    int32_t x, int32_t quantized_multiplier, int shift) {
														
 
															-  TFLITE_DCHECK_GE(shift, 0);
														
 
															-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
														
 
															-}
														
 
															-
														
 
															-inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
														
 
															-                                             int32_t quantized_multiplier,
														
 
															-                                             int shift) {
														
 
															-  // Inputs:
														
 
															-  // - quantized_multiplier has fixed point at bit 31
														
 
															-  // - shift is -31 to +7 (negative for right shift)
														
 
															-  //
														
 
															-  // Assumptions: The following input ranges are assumed
														
 
															-  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
														
 
															-  // - scaling is chosen so final scaled result fits in int32_t
														
 
															-  // - input x is in the range -(1<<47) <= x < (1<<47)
														
 
															-  TFLITE_DCHECK(quantized_multiplier >= 0);
														
 
															-  TFLITE_DCHECK(shift >= -31 && shift < 8);
														
 
															-  TFLITE_DCHECK(x >= -(static_cast<int64_t>(1) << 47) &&
														
 
															-                x < (static_cast<int64_t>(1) << 47));
														
 
															-
														
 
															-  const int32_t reduced_multiplier =
														
 
															-      (quantized_multiplier < 0x7FFF0000)
														
 
															-          ? ((quantized_multiplier + (1 << 15)) >> 16)
														
 
															-          : 0x7FFF;
														
 
															-  const int64_t total_shift = 15 - shift;
														
 
															-  const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
														
 
															-  int64_t result = x * static_cast<int64_t>(reduced_multiplier) + round;
														
 
															-  result = result >> total_shift;
														
 
															-
														
 
															-  TFLITE_DCHECK(result >= std::numeric_limits<int32_t>::min() &&
														
 
															-                result <= std::numeric_limits<int32_t>::max());
														
 
															-  return static_cast<int32_t>(result);
														
 
															-}
														
 
															-
														
 
															-#ifdef USE_NEON
														
 
															-inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
														
 
															-    int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
														
 
															-  TFLITE_DCHECK(quantized_multiplier >= 0);
														
 
															-
														
 
															-  const int right_shift = std::min(-1, shift);
														
 
															-  const int left_shift = shift - right_shift;
														
 
															-
														
 
															-  const int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
														
 
															-  const int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
														
 
															-  const int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
														
 
															-
														
 
															-  int32x4x4_t result;
														
 
															-  result.val[0] = vrshlq_s32(
														
 
															-      vqdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup),
														
 
															-      right_shift_dup);
														
 
															-
														
 
															-  result.val[1] = vrshlq_s32(
														
 
															-      vqdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup),
														
 
															-      right_shift_dup);
														
 
															-
														
 
															-  result.val[2] = vrshlq_s32(
														
 
															-      vqdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup),
														
 
															-      right_shift_dup);
														
 
															-
														
 
															-  result.val[3] = vrshlq_s32(
														
 
															-      vqdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup),
														
 
															-      right_shift_dup);
														
 
															-
														
 
															-  return result;
														
 
															-}
														
 
															-#endif  // USE_NEON
														
 
															-// Double-rounding MultiplyByQuantizedMultiplier
														
 
															-#else
														
 
															-inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-    int32_t x, int32_t quantized_multiplier, int left_shift) {
														
 
															-  using gemmlowp::RoundingDivideByPOT;
														
 
															-  using gemmlowp::SaturatingRoundingDoublingHighMul;
														
 
															-  return RoundingDivideByPOT(
														
 
															-      SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
														
 
															-}
														
 
															-
														
 
															-inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
														
 
															-    int32_t x, int32_t quantized_multiplier, int left_shift) {
														
 
															-  using gemmlowp::SaturatingRoundingDoublingHighMul;
														
 
															-  return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
														
 
															-                                           quantized_multiplier);
														
 
															-}
														
 
															-
														
 
															-inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
														
 
															-                                             int32_t quantized_multiplier,
														
 
															-                                             int shift) {
														
 
															-  using gemmlowp::RoundingDivideByPOT;
														
 
															-  using gemmlowp::SaturatingRoundingDoublingHighMul;
														
 
															-  int left_shift = shift > 0 ? shift : 0;
														
 
															-  int right_shift = shift > 0 ? 0 : -shift;
														
 
															-  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
														
 
															-                                 x * (1 << left_shift), quantized_multiplier),
														
 
															-                             right_shift);
														
 
															-}
														
 
															-
														
 
															-inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
														
 
															-                                             int32_t quantized_multiplier,
														
 
															-                                             int shift) {
														
 
															-  // Inputs:
														
 
															-  // - quantized_multiplier has fixed point at bit 31
														
 
															-  // - shift is -31 to +7 (negative for right shift)
														
 
															-  //
														
 
															-  // Assumptions: The following input ranges are assumed
														
 
															-  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
														
 
															-  // - scaling is chosen so final scaled result fits in int32_t
														
 
															-  // - input x is in the range -(1<<47) <= x < (1<<47)
														
 
															-  assert(quantized_multiplier >= 0);
														
 
															-  assert(shift >= -31 && shift < 8);
														
 
															-  assert(x >= -(static_cast<int64_t>(1) << 47) &&
														
 
															-         x < (static_cast<int64_t>(1) << 47));
														
 
															-
														
 
															-  int32_t reduced_multiplier = (quantized_multiplier < 0x7FFF0000)
														
 
															-                                   ? ((quantized_multiplier + (1 << 15)) >> 16)
														
 
															-                                   : 0x7FFF;
														
 
															-  int total_shift = 15 - shift;
														
 
															-  x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
														
 
															-  int32_t result = x >> total_shift;
														
 
															-  return result;
														
 
															-}
														
 
															-
														
 
															-#ifdef USE_NEON
														
 
															-// Round uses ARM's rounding shift right.
														
 
															-inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
														
 
															-    int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
														
 
															-  const int left_shift = std::max(shift, 0);
														
 
															-  const int right_shift = std::min(shift, 0);
														
 
															-  int32x4x4_t result;
														
 
															-
														
 
															-  int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
														
 
															-  int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
														
 
															-  int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
														
 
															-
														
 
															-  result.val[0] =
														
 
															-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup),
														
 
															-                               multiplier_dup),
														
 
															-                 right_shift_dup);
														
 
															-
														
 
															-  result.val[1] =
														
 
															-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup),
														
 
															-                               multiplier_dup),
														
 
															-                 right_shift_dup);
														
 
															-
														
 
															-  result.val[2] =
														
 
															-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup),
														
 
															-                               multiplier_dup),
														
 
															-                 right_shift_dup);
														
 
															-
														
 
															-  result.val[3] =
														
 
															-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup),
														
 
															-                               multiplier_dup),
														
 
															-                 right_shift_dup);
														
 
															-
														
 
															-  return result;
														
 
															-}
														
 
															-#endif  // USE_NEON
														
 
															-#endif  // TFLITE_SINGLE_ROUNDING
														
 
															-
														
 
															-template <typename T>
														
 
															-int CountLeadingZeros(T integer_input) {
														
 
															-  static_assert(std::is_unsigned<T>::value,
														
 
															-                "Only unsigned integer types handled.");
														
 
															-#if defined(__GNUC__)
														
 
															-  return integer_input ? __builtin_clz(integer_input)
														
 
															-                       : std::numeric_limits<T>::digits;
														
 
															-#else
														
 
															-  if (integer_input == 0) {
														
 
															-    return std::numeric_limits<T>::digits;
														
 
															-  }
														
 
															-
														
 
															-  const T one_in_leading_positive = static_cast<T>(1)
														
 
															-                                    << (std::numeric_limits<T>::digits - 1);
														
 
															-  int leading_zeros = 0;
														
 
															-  while (integer_input < one_in_leading_positive) {
														
 
															-    integer_input <<= 1;
														
 
															-    ++leading_zeros;
														
 
															-  }
														
 
															-  return leading_zeros;
														
 
															-#endif
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline int CountLeadingSignBits(T integer_input) {
														
 
															-  static_assert(std::is_signed<T>::value, "Only signed integer types handled.");
														
 
															-#if defined(__GNUC__) && !defined(__clang__)
														
 
															-  return integer_input ? __builtin_clrsb(integer_input)
														
 
															-                       : std::numeric_limits<T>::digits;
														
 
															-#else
														
 
															-  using U = typename std::make_unsigned<T>::type;
														
 
															-  return integer_input >= 0
														
 
															-             ? CountLeadingZeros(static_cast<U>(integer_input)) - 1
														
 
															-         : integer_input != std::numeric_limits<T>::min()
														
 
															-             ? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
														
 
															-             : 0;
														
 
															-#endif
														
 
															-}
														
 
															-
														
 
															-// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)).
														
 
															-template <typename Integer>
														
 
															-inline Integer FloorLog2(Integer n) {
														
 
															-  static_assert(std::is_integral<Integer>::value, "");
														
 
															-  static_assert(std::is_signed<Integer>::value, "");
														
 
															-  static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, "");
														
 
															-  TFLITE_CHECK_GT(n, 0);
														
 
															-  if (sizeof(Integer) == 4) {
														
 
															-    return 30 - CountLeadingSignBits(n);
														
 
															-  } else {
														
 
															-    return 62 - CountLeadingSignBits(n);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// The size of the LUT depends on the type of input. For int8 inputs a simple
														
 
															-// 256 entries LUT is used. For int16 inputs the high 9 bits are used for
														
 
															-// indexing and the 7 remaining bits are used for interpolation. We thus use a
														
 
															-// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry
														
 
															-// to interpolate the last value.
														
 
															-template <typename LutInT>
														
 
															-constexpr int lut_size() {
														
 
															-  static_assert(std::is_same<LutInT, int8_t>::value ||
														
 
															-                    std::is_same<LutInT, int16_t>::value,
														
 
															-                "Only LUTs with int8 or int16 inputs are supported.");
														
 
															-  return std::is_same<LutInT, int8_t>::value ? 256 : 513;
														
 
															-}
														
 
															-
														
 
															-// Generate a LUT for 'func' which can be used to approximate functions like
														
 
															-// exp, log, ...
														
 
															-//
														
 
															-// - func: the function to build the LUT for (e.g exp(x))
														
 
															-// - input_min, input_max: range of the func inputs
														
 
															-// - output_min, output_max: range of the func outputs
														
 
															-// - lut: pointer to the LUT table to fill, the table must be of size
														
 
															-// lut_size<LutInT>()
														
 
															-template <typename FloatT, typename LutInT, typename LutOutT>
														
 
															-inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max,
														
 
															-                    FloatT output_min, FloatT output_max, LutOutT* lut) {
														
 
															-  static_assert(std::is_same<LutInT, int8_t>::value ||
														
 
															-                    std::is_same<LutInT, int16_t>::value,
														
 
															-                "Only LUTs with int8 or int16 inputs are supported.");
														
 
															-  static_assert(std::is_same<LutOutT, int8_t>::value ||
														
 
															-                    std::is_same<LutOutT, int16_t>::value,
														
 
															-                "Only LUTs with int8 or int16 outputs are supported.");
														
 
															-  static_assert(std::is_floating_point<FloatT>::value,
														
 
															-                "FloatT must be a floating-point type.");
														
 
															-
														
 
															-  const int nb_steps = std::is_same<LutInT, int8_t>::value ? 256 : 512;
														
 
															-  const FloatT step = (input_max - input_min) / nb_steps;
														
 
															-  const FloatT half_step = step / 2;
														
 
															-  const FloatT output_scaling_inv =
														
 
															-      static_cast<FloatT>(std::numeric_limits<LutOutT>::max() -
														
 
															-                          std::numeric_limits<LutOutT>::min() + 1) /
														
 
															-      (output_max - output_min);
														
 
															-  const FloatT table_min =
														
 
															-      static_cast<FloatT>(std::numeric_limits<LutOutT>::min());
														
 
															-  const FloatT table_max =
														
 
															-      static_cast<FloatT>(std::numeric_limits<LutOutT>::max());
														
 
															-
														
 
															-  for (int i = 0; i < nb_steps; i++) {
														
 
															-    const FloatT val = func(input_min + i * step);
														
 
															-    const FloatT val_midpoint = func(input_min + i * step + half_step);
														
 
															-    const FloatT val_next = func(input_min + (i + 1) * step);
														
 
															-
														
 
															-    const FloatT sample_val = TfLiteRound(val * output_scaling_inv);
														
 
															-    const FloatT midpoint_interp_val =
														
 
															-        TfLiteRound((val_next * output_scaling_inv +
														
 
															-                     TfLiteRound(val * output_scaling_inv)) /
														
 
															-                    2);
														
 
															-    const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv);
														
 
															-    const FloatT midpoint_err = midpoint_interp_val - midpoint_val;
														
 
															-    const FloatT bias = TfLiteRound(midpoint_err / 2);
														
 
															-
														
 
															-    lut[i] = static_cast<LutOutT>(std::min<FloatT>(
														
 
															-        std::max<FloatT>(sample_val - bias, table_min), table_max));
														
 
															-  }
														
 
															-
														
 
															-  const bool with_extra_interpolation_value =
														
 
															-      std::is_same<LutInT, int16_t>::value;
														
 
															-  if (with_extra_interpolation_value) {
														
 
															-    lut[nb_steps] = static_cast<LutOutT>(std::min<FloatT>(
														
 
															-        std::max<FloatT>(TfLiteRound(func(input_max) * output_scaling_inv),
														
 
															-                         table_min),
														
 
															-        table_max));
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// LUT must have 513 values
														
 
															-template <typename LutOutT>
														
 
															-inline LutOutT lut_lookup_with_interpolation(int16_t value,
														
 
															-                                             const LutOutT* lut) {
														
 
															-  static_assert(std::is_same<LutOutT, int8_t>::value ||
														
 
															-                    std::is_same<LutOutT, int16_t>::value,
														
 
															-                "Only LUTs with int8 or int16 outputs are supported.");
														
 
															-  // 512 base values, lut[513] is only used to calculate the slope
														
 
															-  const uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
														
 
															-  assert(index < 512 && "LUT index out of range.");
														
 
															-  const int16_t offset = value & 0x7f;
														
 
															-
														
 
															-  // Base and slope are Q0.x
														
 
															-  const LutOutT base = lut[index];
														
 
															-  const LutOutT slope = lut[index + 1] - lut[index];
														
 
															-
														
 
															-  // Q0.x * Q0.7 = Q0.(x + 7)
														
 
															-  // Round and convert from Q0.(x + 7) to Q0.x
														
 
															-  const int delta = (slope * offset + 64) >> 7;
														
 
															-
														
 
															-  // Q0.15 + Q0.15
														
 
															-  return static_cast<LutOutT>(base + delta);
														
 
															-}
														
 
															-
														
 
															-// int16_t -> int16_t table lookup with interpolation
														
 
															-// LUT must have 513 values
														
 
															-inline int16_t lut_lookup(int16_t value, const int16_t* lut) {
														
 
															-  return lut_lookup_with_interpolation(value, lut);
														
 
															-}
														
 
															-
														
 
															-// int16_t -> int8_t table lookup with interpolation
														
 
															-// LUT must have 513 values
														
 
															-inline int8_t lut_lookup(int16_t value, const int8_t* lut) {
														
 
															-  return lut_lookup_with_interpolation(value, lut);
														
 
															-}
														
 
															-
														
 
															-// int8_t -> int8_t table lookup without interpolation
														
 
															-// LUT must have 256 values
														
 
															-inline int8_t lut_lookup(int8_t value, const int8_t* lut) {
														
 
															-  return lut[128 + value];
														
 
															-}
														
 
															-
														
 
															-// int8_t -> int16_t table lookup without interpolation
														
 
															-// LUT must have 256 values
														
 
															-inline int16_t lut_lookup(int8_t value, const int16_t* lut) {
														
 
															-  return lut[128 + value];
														
 
															-}
														
 
															-
														
 
															-// Table of sigmoid(i/24) at 0.16 format - 256 elements.
														
 
															-
														
 
															-// We use combined sigmoid and tanh look-up table, since
														
 
															-// tanh(x) = 2*sigmoid(2*x) -1.
														
 
															-// Both functions are symmetric, so the LUT table is only needed
														
 
															-// for the absolute value of the input.
														
 
															-static const uint16_t sigmoid_table_uint16[256] = {
														
 
															-    32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498,
														
 
															-    40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255,
														
 
															-    46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865,
														
 
															-    52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174,
														
 
															-    56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288,
														
 
															-    59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441,
														
 
															-    61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886,
														
 
															-    62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835,
														
 
															-    63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450,
														
 
															-    64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845,
														
 
															-    64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097,
														
 
															-    65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258,
														
 
															-    65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360,
														
 
															-    65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
														
 
															-    65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465,
														
 
															-    65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491,
														
 
															-    65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508,
														
 
															-    65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518,
														
 
															-    65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525,
														
 
															-    65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529,
														
 
															-    65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531,
														
 
															-    65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533,
														
 
															-    65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
														
 
															-    65534, 65534, 65535};
														
 
															-
														
 
															-// TODO(b/77858996): Add these to gemmlowp.
														
 
															-template <typename IntegerType>
														
 
															-IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
														
 
															-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
														
 
															-  return a;
														
 
															-}
														
 
															-
														
 
															-template <>
														
 
															-inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
														
 
															-  std::int64_t a64 = a;
														
 
															-  std::int64_t b64 = b;
														
 
															-  std::int64_t sum = a64 + b64;
														
 
															-  return static_cast<std::int32_t>(std::min(
														
 
															-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
														
 
															-      std::max(
														
 
															-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
														
 
															-          sum)));
														
 
															-}
														
 
															-
														
 
															-template <typename tRawType, int tIntegerBits>
														
 
															-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
														
 
															-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
														
 
															-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
														
 
															-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
														
 
															-      SaturatingAddNonGemmlowp(a.raw(), b.raw()));
														
 
															-}
														
 
															-
														
 
															-template <typename IntegerType>
														
 
															-IntegerType SaturatingSub(IntegerType a, IntegerType b) {
														
 
															-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
														
 
															-  return a;
														
 
															-}
														
 
															-
														
 
															-template <>
														
 
															-inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
														
 
															-  std::int32_t a32 = a;
														
 
															-  std::int32_t b32 = b;
														
 
															-  std::int32_t diff = a32 - b32;
														
 
															-  return static_cast<std::int16_t>(
														
 
															-      std::min(static_cast<int32_t>(32767),
														
 
															-               std::max(static_cast<int32_t>(-32768), diff)));
														
 
															-}
														
 
															-
														
 
															-template <>
														
 
															-inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
														
 
															-  std::int64_t a64 = a;
														
 
															-  std::int64_t b64 = b;
														
 
															-  std::int64_t diff = a64 - b64;
														
 
															-  return static_cast<std::int32_t>(std::min(
														
 
															-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
														
 
															-      std::max(
														
 
															-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
														
 
															-          diff)));
														
 
															-}
														
 
															-
														
 
															-template <typename tRawType, int tIntegerBits>
														
 
															-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
														
 
															-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
														
 
															-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
														
 
															-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
														
 
															-      SaturatingSub(a.raw(), b.raw()));
														
 
															-}
														
 
															-// End section to be moved to gemmlowp.
														
 
															-
														
 
															-template <typename IntegerType>
														
 
															-IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
														
 
															-  if (exponent == 0) {
														
 
															-    return x;
														
 
															-  }
														
 
															-  using ScalarIntegerType =
														
 
															-      typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
														
 
															-  const IntegerType min =
														
 
															-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
														
 
															-  const IntegerType max =
														
 
															-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
														
 
															-  const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
														
 
															-
														
 
															-  const std::int32_t threshold =
														
 
															-      ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
														
 
															-  const IntegerType positive_mask =
														
 
															-      gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
														
 
															-  const IntegerType negative_mask =
														
 
															-      gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
														
 
															-
														
 
															-  IntegerType result = gemmlowp::ShiftLeft(x, exponent);
														
 
															-  result = gemmlowp::SelectUsingMask(positive_mask, max, result);
														
 
															-  result = gemmlowp::SelectUsingMask(negative_mask, min, result);
														
 
															-  return result;
														
 
															-}
														
 
															-
														
 
															-// If we want to leave IntegerBits fixed, then multiplication
														
 
															-// by a power of two has to be saturating/rounding, not exact anymore.
														
 
															-template <typename tRawType, int tIntegerBits>
														
 
															-gemmlowp::FixedPoint<tRawType, tIntegerBits>
														
 
															-SaturatingRoundingMultiplyByPOTParam(
														
 
															-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
														
 
															-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
														
 
															-      SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
														
 
															-}
														
 
															-
														
 
															-// Convert int32_t multiplier to int16_t with rounding.
														
 
															-inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
														
 
															-                                            int16_t* multiplier_int16_t) {
														
 
															-  TFLITE_DCHECK_GE(multiplier_int32_t, 0);
														
 
															-  static constexpr int32_t kRoundingOffset = 1 << 15;
														
 
															-  if (multiplier_int32_t >=
														
 
															-      std::numeric_limits<int32_t>::max() - kRoundingOffset) {
														
 
															-    *multiplier_int16_t = std::numeric_limits<int16_t>::max();
														
 
															-    return;
														
 
															-  }
														
 
															-  const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
														
 
															-  TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
														
 
															-  TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
														
 
															-  *multiplier_int16_t = result;
														
 
															-  TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
														
 
															-}
														
 
															-
														
 
															-// Minimum output bits to accommodate log of maximum input range.  It actually
														
 
															-// does not matter if one considers, say, [-64,64] or [-64,64).
														
 
															-//
														
 
															-// For example, run this through Octave:
														
 
															-// [0:127; ...
														
 
															-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
														
 
															-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
														
 
															-constexpr int min_log_x_output_bits(int input_bits) {
														
 
															-  return input_bits > 90   ? 7
														
 
															-         : input_bits > 44 ? 6
														
 
															-         : input_bits > 21 ? 5
														
 
															-         : input_bits > 10 ? 4
														
 
															-         : input_bits > 4  ? 3
														
 
															-         : input_bits > 1  ? 2
														
 
															-                           : 1;
														
 
															-}
														
 
															-
														
 
															-// Although currently the name of this function says that it cannot handle
														
 
															-// values less than 1, in practice it can handle as low as 1/x_max, where
														
 
															-// x_max is the largest representable input.  In other words, the output range
														
 
															-// is symmetric.
														
 
															-template <int OutputIntegerBits, int InputIntegerBits>
														
 
															-inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
														
 
															-log_x_for_x_greater_than_or_equal_to_1_impl(
														
 
															-    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
														
 
															-  // assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
														
 
															-  // assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
														
 
															-  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
														
 
															-  // The reason for accumulating the result with an extra bit of headroom is
														
 
															-  // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
														
 
															-  // recip_denom will otherwise introduce an error.
														
 
															-  static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
														
 
															-  using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
														
 
															-
														
 
															-  const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
														
 
															-      FixedPoint0, 1488522236, std::log(2.0));
														
 
															-  const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
														
 
															-      FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
														
 
															-  const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
														
 
															-      FixedPoint0, 1518500250, std::sqrt(0.5));
														
 
															-  const FixedPoint0 one_quarter =
														
 
															-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
														
 
															-
														
 
															-  const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
														
 
															-      FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
														
 
															-  const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
														
 
															-      FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
														
 
															-  const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
														
 
															-      FixedPoint0, 1057819769,
														
 
															-      2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
														
 
															-  const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
														
 
															-      FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
														
 
															-
														
 
															-  const FixedPointAccum shifted_quarter =
														
 
															-      gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
														
 
															-
														
 
															-  // Reinterpret the input value as Q0.31, because we will figure out the
														
 
															-  // required shift "ourselves" instead of using, say, Rescale.
														
 
															-  FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
														
 
															-  // z_a_pow_2 = input_integer_bits - z_a_headroom;
														
 
															-  int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
														
 
															-  FixedPoint0 r_a_tmp =
														
 
															-      SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
														
 
															-  const int32_t r_a_raw =
														
 
															-      SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
														
 
															-  // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
														
 
															-  // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
														
 
															-  //                   InputIntegerBits - z_b_headroom - 0.25);
														
 
															-  const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
														
 
															-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
														
 
															-          static_cast<int32_t>(InputIntegerBits - z_a_headroom_plus_1),
														
 
															-          31 - kAccumIntegerBits)),
														
 
															-      shifted_quarter);
														
 
															-
														
 
															-  // z_b is treated like z_a, but premultiplying by sqrt(0.5).
														
 
															-  FixedPoint0 z_b = z_a * sqrt_half;
														
 
															-  int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
														
 
															-  const int32_t r_b_raw =
														
 
															-      SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
														
 
															-  const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
														
 
															-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
														
 
															-          static_cast<int32_t>(InputIntegerBits - z_b_headroom),
														
 
															-          31 - kAccumIntegerBits)),
														
 
															-      shifted_quarter);
														
 
															-
														
 
															-  const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
														
 
															-  const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
														
 
															-      std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
														
 
															-
														
 
															-  const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
														
 
															-  FixedPoint0 q = r - sqrt_sqrt_half;
														
 
															-  q = q + q;
														
 
															-
														
 
															-  const FixedPoint0 common_sq = q * q;
														
 
															-  const FixedPoint0 num = q * r + q * common_sq * alpha_n;
														
 
															-  const FixedPoint0 denom_minus_one_0 =
														
 
															-      p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
														
 
															-  const FixedPoint0 recip_denom =
														
 
															-      one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
														
 
															-
														
 
															-  const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
														
 
															-  return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
														
 
															-                                              num_scaled * recip_denom);
														
 
															-}
														
 
															-
														
 
															-template <int OutputIntegerBits, int InputIntegerBits>
														
 
															-inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
														
 
															-log_x_for_x_greater_than_or_equal_to_1(
														
 
															-    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
														
 
															-  static_assert(
														
 
															-      OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
														
 
															-      "Output integer bits must be sufficient to accommodate logs of inputs.");
														
 
															-  return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
														
 
															-                                                     InputIntegerBits>(
														
 
															-      input_val);
														
 
															-}
														
 
															-
														
 
															-inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
														
 
															-                             int* num_bits_over_unit) {
														
 
															-  int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
														
 
															-  // This is the number of bits to the left of the binary point above 1.0.
														
 
															-  // Consider x=1.25.  In that case shifted_scale=0.8 and
														
 
															-  // no later adjustment will be needed.
														
 
															-  *num_bits_over_unit = x_integer_digits - headroom_plus_one;
														
 
															-  const int32_t shifted_sum_minus_one =
														
 
															-      static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
														
 
															-                           (static_cast<uint32_t>(1) << 31));
														
 
															-
														
 
															-  gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
														
 
															-      gemmlowp::one_over_one_plus_x_for_x_in_0_1(
														
 
															-          gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
														
 
															-  return shifted_scale.raw();
														
 
															-}
														
 
															-
														
 
															-inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
														
 
															-                                             int32_t* output_inv_sqrt,
														
 
															-                                             int* output_shift) {
														
 
															-  TFLITE_DCHECK_GE(input, 0);
														
 
															-  if (input <= 1) {
														
 
															-    // Handle the input value 1 separately to avoid overflow in that case
														
 
															-    // in the general computation below (b/143972021). Also handle 0 as if it
														
 
															-    // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
														
 
															-    // but rare/unrealistic input value. We can expect both to occur in some
														
 
															-    // incompletely trained models, but probably not in fully trained models.
														
 
															-    *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
														
 
															-    *output_shift = 0;
														
 
															-    return;
														
 
															-  }
														
 
															-  TFLITE_DCHECK_GT(input, 1);
														
 
															-  *output_shift = 11;
														
 
															-  while (input >= (1 << 29)) {
														
 
															-    input /= 4;
														
 
															-    ++*output_shift;
														
 
															-  }
														
 
															-  const unsigned max_left_shift_bits =
														
 
															-      CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
														
 
															-  const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
														
 
															-  const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
														
 
															-  *output_shift -= left_shift_bit_pairs;
														
 
															-  input <<= 2 * left_shift_bit_pairs;
														
 
															-  TFLITE_DCHECK_GE(input, (1 << 27));
														
 
															-  TFLITE_DCHECK_LT(input, (1 << 29));
														
 
															-  using gemmlowp::FixedPoint;
														
 
															-  using gemmlowp::Rescale;
														
 
															-  using gemmlowp::SaturatingRoundingMultiplyByPOT;
														
 
															-  // Using 3 integer bits gives us enough room for the internal arithmetic in
														
 
															-  // this Newton-Raphson iteration.
														
 
															-  using F3 = FixedPoint<int32_t, 3>;
														
 
															-  using F0 = FixedPoint<int32_t, 0>;
														
 
															-  const F3 fixedpoint_input = F3::FromRaw(input >> 1);
														
 
															-  const F3 fixedpoint_half_input =
														
 
															-      SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
														
 
															-  const F3 fixedpoint_half_three =
														
 
															-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
														
 
															-  // Newton-Raphson iteration
														
 
															-  // Naive unoptimized starting guess: x = 1
														
 
															-  F3 x = F3::One();
														
 
															-  // Naive unoptimized number of iterations: 5
														
 
															-  for (int i = 0; i < 5; i++) {
														
 
															-    const F3 x3 = Rescale<3>(x * x * x);
														
 
															-    x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
														
 
															-  }
														
 
															-  const F0 fixedpoint_half_sqrt_2 =
														
 
															-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
														
 
															-  x = x * fixedpoint_half_sqrt_2;
														
 
															-  *output_inv_sqrt = x.raw();
														
 
															-  if (*output_shift < 0) {
														
 
															-    *output_inv_sqrt <<= -*output_shift;
														
 
															-    *output_shift = 0;
														
 
															-  }
														
 
															-  // Convert right shift (right is positive) to left shift.
														
 
															-  *output_shift *= reverse_shift;
														
 
															-}
														
 
															-
														
 
															-// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
														
 
															-// BROADCASTING.
														
 
															-//
														
 
															-// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
														
 
															-// rectangular array of numbers.
														
 
															-//
														
 
															-// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
														
 
															-// However, as Dims<N> is to be deprecated, this class exists as an adaptor
														
 
															-// to enable simple unoptimized implementations of element-wise broadcasting
														
 
															-// operations.
														
 
															-template <int N>
														
 
															-struct NdArrayDesc {
														
 
															-  // The "extent" of each dimension. Indices along dimension d must be in the
														
 
															-  // half-open interval [0, extents[d]).
														
 
															-  int extents[N];
														
 
															-
														
 
															-  // The number of *elements* (not bytes) between consecutive indices of each
														
 
															-  // dimension.
														
 
															-  int strides[N];
														
 
															-};
														
 
															-
														
 
															-// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
														
 
															-// BROADCASTING.
														
 
															-//
														
 
															-// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
														
 
															-inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
														
 
															-                            int i3) {
														
 
															-  TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
														
 
															-  TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
														
 
															-  TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
														
 
															-  TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
														
 
															-  return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
														
 
															-         i3 * desc.strides[3];
														
 
															-}
														
 
															-
														
 
															-inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) {
														
 
															-  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
														
 
															-         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
														
 
															-         indexes[4] * desc.strides[4];
														
 
															-}
														
 
															-
														
 
															-inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) {
														
 
															-  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
														
 
															-         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
														
 
															-         indexes[4] * desc.strides[4] + indexes[5] * desc.strides[5] +
														
 
															-         indexes[6] * desc.strides[6] + indexes[7] * desc.strides[7];
														
 
															-}
														
 
															-
														
 
															-// Given the dimensions of the operands for an element-wise binary broadcast,
														
 
															-// adjusts them so that they can be directly iterated over with simple loops.
														
 
															-// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
														
 
															-// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr.
														
 
															-//
														
 
															-// This function assumes that the two input shapes are compatible up to
														
 
															-// broadcasting and the shorter one has already been prepended with 1s to be the
														
 
															-// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64),
														
 
															-// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that
														
 
															-// Dims<N> refer to shapes in reverse order. In this case, input0_dims will be
														
 
															-// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1).
														
 
															-//
														
 
															-// When two shapes are compatible up to broadcasting, for each dimension d,
														
 
															-// the input extents are either equal, or one of them is 1.
														
 
															-//
														
 
															-// This function performs the following for each dimension d:
														
 
															-// - If the extents are equal, then do nothing since the loop that walks over
														
 
															-//   both of the input arrays is correct.
														
 
															-// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1
														
 
															-//   and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows
														
 
															-//   array0 to be referenced *at any index* in dimension d and still access the
														
 
															-//   same slice.
														
 
															-template <int N>
														
 
															-inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
														
 
															-                                                const Dims<N>& input1_dims,
														
 
															-                                                NdArrayDesc<N>* desc0_out,
														
 
															-                                                NdArrayDesc<N>* desc1_out) {
														
 
															-  TFLITE_DCHECK(desc0_out != nullptr);
														
 
															-  TFLITE_DCHECK(desc1_out != nullptr);
														
 
															-
														
 
															-  // Copy dims to desc.
														
 
															-  for (int i = 0; i < N; ++i) {
														
 
															-    desc0_out->extents[i] = input0_dims.sizes[i];
														
 
															-    desc0_out->strides[i] = input0_dims.strides[i];
														
 
															-    desc1_out->extents[i] = input1_dims.sizes[i];
														
 
															-    desc1_out->strides[i] = input1_dims.strides[i];
														
 
															-  }
														
 
															-
														
 
															-  // Walk over each dimension. If the extents are equal do nothing.
														
 
															-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
														
 
															-  // stride 0.
														
 
															-  for (int i = 0; i < N; ++i) {
														
 
															-    const int extent0 = ArraySize(input0_dims, i);
														
 
															-    const int extent1 = ArraySize(input1_dims, i);
														
 
															-    if (extent0 != extent1) {
														
 
															-      if (extent0 == 1) {
														
 
															-        desc0_out->strides[i] = 0;
														
 
															-        desc0_out->extents[i] = extent1;
														
 
															-      } else {
														
 
															-        TFLITE_DCHECK_EQ(extent1, 1);
														
 
															-        desc1_out->strides[i] = 0;
														
 
															-        desc1_out->extents[i] = extent0;
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Copies dims to desc, calculating strides.
														
 
															-template <int N>
														
 
															-inline void CopyDimsToDesc(const RuntimeShape& input_shape,
														
 
															-                           NdArrayDesc<N>* desc_out) {
														
 
															-  int desc_stride = 1;
														
 
															-  for (int i = N - 1; i >= 0; --i) {
														
 
															-    desc_out->extents[i] = input_shape.Dims(i);
														
 
															-    desc_out->strides[i] = desc_stride;
														
 
															-    desc_stride *= input_shape.Dims(i);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <int N>
														
 
															-inline void NdArrayDescsForElementwiseBroadcast(
														
 
															-    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
														
 
															-    NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
														
 
															-  TFLITE_DCHECK(desc0_out != nullptr);
														
 
															-  TFLITE_DCHECK(desc1_out != nullptr);
														
 
															-
														
 
															-  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
														
 
															-  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
														
 
															-
														
 
															-  // Copy dims to desc, calculating strides.
														
 
															-  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
														
 
															-  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
														
 
															-
														
 
															-  // Walk over each dimension. If the extents are equal do nothing.
														
 
															-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
														
 
															-  // stride 0.
														
 
															-  for (int i = 0; i < N; ++i) {
														
 
															-    const int extent0 = extended_input0_shape.Dims(i);
														
 
															-    const int extent1 = extended_input1_shape.Dims(i);
														
 
															-    if (extent0 != extent1) {
														
 
															-      if (extent0 == 1) {
														
 
															-        desc0_out->strides[i] = 0;
														
 
															-        desc0_out->extents[i] = extent1;
														
 
															-      } else {
														
 
															-        TFLITE_DCHECK_EQ(extent1, 1);
														
 
															-        desc1_out->strides[i] = 0;
														
 
															-        desc1_out->extents[i] = extent0;
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <int N>
														
 
															-inline void NdArrayDescsForElementwiseBroadcast(
														
 
															-    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
														
 
															-    const RuntimeShape& input2_shape, NdArrayDesc<N>* desc0_out,
														
 
															-    NdArrayDesc<N>* desc1_out, NdArrayDesc<N>* desc2_out) {
														
 
															-  TFLITE_DCHECK(desc0_out != nullptr);
														
 
															-  TFLITE_DCHECK(desc1_out != nullptr);
														
 
															-  TFLITE_DCHECK(desc2_out != nullptr);
														
 
															-
														
 
															-  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
														
 
															-  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
														
 
															-  auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape);
														
 
															-
														
 
															-  // Copy dims to desc, calculating strides.
														
 
															-  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
														
 
															-  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
														
 
															-  CopyDimsToDesc<N>(extended_input2_shape, desc2_out);
														
 
															-
														
 
															-  // Walk over each dimension. If the extents are equal do nothing.
														
 
															-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
														
 
															-  // stride 0.
														
 
															-  for (int i = 0; i < N; ++i) {
														
 
															-    const int extent0 = extended_input0_shape.Dims(i);
														
 
															-    const int extent1 = extended_input1_shape.Dims(i);
														
 
															-    const int extent2 = extended_input2_shape.Dims(i);
														
 
															-
														
 
															-    int extent = extent0;
														
 
															-    if (extent1 != 1) extent = extent1;
														
 
															-    if (extent2 != 1) extent = extent2;
														
 
															-
														
 
															-    TFLITE_DCHECK(extent0 == 1 || extent0 == extent);
														
 
															-    TFLITE_DCHECK(extent1 == 1 || extent1 == extent);
														
 
															-    TFLITE_DCHECK(extent2 == 1 || extent2 == extent);
														
 
															-
														
 
															-    if (!(extent0 == extent1 && extent1 == extent2)) {
														
 
															-      if (extent0 == 1) {
														
 
															-        desc0_out->strides[i] = 0;
														
 
															-        desc0_out->extents[i] = extent;
														
 
															-      }
														
 
															-      if (extent1 == 1) {
														
 
															-        desc1_out->strides[i] = 0;
														
 
															-        desc1_out->extents[i] = extent;
														
 
															-      }
														
 
															-      if (extent2 == 1) {
														
 
															-        desc2_out->strides[i] = 0;
														
 
															-        desc2_out->extents[i] = extent;
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Detailed implementation of NDOpsHelper, the indexes must be a zero array.
														
 
															-// This implementation is equivalent to N nested loops. Ex, if N=4, it can be
														
 
															-// re-writen as:
														
 
															-// for (int b = 0; b < output.extents[0]; ++b) {
														
 
															-//   for (int y = 0; y < output.extents[1]; ++y) {
														
 
															-//     for (int x = 0; x < output.extents[2]; ++x) {
														
 
															-//       for (int c = 0; c < output.extents[3]; ++c) {
														
 
															-//           calc({b,y,x,c});
														
 
															-//       }
														
 
															-//     }
														
 
															-//   }
														
 
															-// }
														
 
															-template <int N, int DIM, typename Calc>
														
 
															-typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(
														
 
															-    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
														
 
															-  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
														
 
															-    NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <int N, int DIM, typename Calc>
														
 
															-typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(
														
 
															-    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
														
 
															-  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
														
 
															-    calc(indexes);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Execute the calc function in the innermost iteration based on the shape of
														
 
															-// the output. The calc function should take a single argument of type int[N].
														
 
															-template <int N, typename Calc>
														
 
															-inline void NDOpsHelper(const NdArrayDesc<N>& output, const Calc& calc) {
														
 
															-  int indexes[N] = {0};
														
 
															-  NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
														
 
															-}
														
 
															-// Copied from gemmlowp::RoundDown when we dropped direct dependency on
														
 
															-// gemmlowp.
														
 
															-//
														
 
															-// Returns the runtime argument rounded down to the nearest multiple of
														
 
															-// the fixed Modulus.
														
 
															-template <unsigned Modulus, typename Integer>
														
 
															-Integer RoundDown(Integer i) {
														
 
															-  return i - (i % Modulus);
														
 
															-}
														
 
															-
														
 
															-// Copied from gemmlowp::RoundUp when we dropped direct dependency on
														
 
															-// gemmlowp.
														
 
															-//
														
 
															-// Returns the runtime argument rounded up to the nearest multiple of
														
 
															-// the fixed Modulus.
														
 
															-template <unsigned Modulus, typename Integer>
														
 
															-Integer RoundUp(Integer i) {
														
 
															-  return RoundDown<Modulus>(i + Modulus - 1);
														
 
															-}
														
 
															-
														
 
															-// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on
														
 
															-// gemmlowp.
														
 
															-//
														
 
															-// Returns the quotient a / b rounded up ('ceil') to the nearest integer.
														
 
															-template <typename Integer>
														
 
															-Integer CeilQuotient(Integer a, Integer b) {
														
 
															-  return (a + b - 1) / b;
														
 
															-}
														
 
															-
														
 
															-// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped
														
 
															-// the direct dependency of internal/optimized/ on gemmlowp.
														
 
															-//
														
 
															-// It computes a reasonable number of threads to use for a GEMM of shape
														
 
															-// (rows, cols, depth).
														
 
															-//
														
 
															-// TODO(b/131910176): get rid of this function by switching each call site
														
 
															-// to its own more sensible logic for its own workload.
														
 
															-template <int KernelRows>
														
 
															-inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols,
														
 
															-                                int depth) {
														
 
															-  // Early-exit in the default case where multi-threading is disabled.
														
 
															-  if (max_num_threads == 1) {
														
 
															-    return 1;
														
 
															-  }
														
 
															-
														
 
															-  // Ensure that each thread has KernelRows rows to process, if at all possible.
														
 
															-  int thread_count = std::min(max_num_threads, rows / KernelRows);
														
 
															-
														
 
															-  // Limit the number of threads according to the overall size of the problem.
														
 
															-  if (thread_count > 1) {
														
 
															-    // Empirically determined value.
														
 
															-    static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024;
														
 
															-
														
 
															-    // We can only multiply two out of three sizes without risking overflow
														
 
															-    const std::uint64_t cubic_size =
														
 
															-        std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth);
														
 
															-
														
 
															-    thread_count = std::min(
														
 
															-        thread_count, static_cast<int>(cubic_size / min_cubic_size_per_thread));
														
 
															-  }
														
 
															-
														
 
															-  if (thread_count < 1) {
														
 
															-    thread_count = 1;
														
 
															-  }
														
 
															-
														
 
															-  assert(thread_count > 0 && thread_count <= max_num_threads);
														
 
															-  return thread_count;
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-void optimized_ops_preload_l1_stream(const T* ptr) {
														
 
															-#ifdef __GNUC__
														
 
															-  // builtin offered by GCC-compatible compilers including clang
														
 
															-  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0);
														
 
															-#else
														
 
															-  (void)ptr;
														
 
															-#endif
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-void optimized_ops_preload_l1_keep(const T* ptr) {
														
 
															-#ifdef __GNUC__
														
 
															-  // builtin offered by GCC-compatible compilers including clang
														
 
															-  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
														
 
															-#else
														
 
															-  (void)ptr;
														
 
															-#endif
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-void optimized_ops_prefetch_write_l1_keep(const T* ptr) {
														
 
															-#ifdef __GNUC__
														
 
															-  // builtin offered by GCC-compatible compilers including clang
														
 
															-  __builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3);
														
 
															-#else
														
 
															-  (void)ptr;
														
 
															-#endif
														
 
															-}
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h
@@ -1,122 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
														
 
															-
														
 
															-#include <cstdint>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/op_macros.h"
														
 
															-
														
 
															-#ifndef TFLITE_DCHECK
														
 
															-#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_DCHECK_EQ
														
 
															-#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_DCHECK_NE
														
 
															-#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_DCHECK_GE
														
 
															-#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_DCHECK_GT
														
 
															-#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_DCHECK_LE
														
 
															-#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_DCHECK_LT
														
 
															-#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
														
 
															-#endif
														
 
															-
														
 
															-// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
														
 
															-#ifndef TFLITE_CHECK
														
 
															-#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_CHECK_EQ
														
 
															-#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_CHECK_NE
														
 
															-#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_CHECK_GE
														
 
															-#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_CHECK_GT
														
 
															-#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_CHECK_LE
														
 
															-#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_CHECK_LT
														
 
															-#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TF_LITE_STATIC_MEMORY
														
 
															-// TODO(b/162019032): Consider removing these type-aliases.
														
 
															-using int8 = std::int8_t;
														
 
															-using uint8 = std::uint8_t;
														
 
															-using int16 = std::int16_t;
														
 
															-using uint16 = std::uint16_t;
														
 
															-using int32 = std::int32_t;
														
 
															-using uint32 = std::uint32_t;
														
 
															-#endif  // !defined(TF_LITE_STATIC_MEMORY)
														
 
															-
														
 
															-// Allow for cross-compiler usage of function signatures - currently used for
														
 
															-// specifying named RUY profiler regions in templated methods.
														
 
															-#if defined(_MSC_VER)
														
 
															-#define TFLITE_PRETTY_FUNCTION __FUNCSIG__
														
 
															-#elif defined(__GNUC__)
														
 
															-#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__
														
 
															-#else
														
 
															-#define TFLITE_PRETTY_FUNCTION __func__
														
 
															-#endif
														
 
															-
														
 
															-// TFLITE_DEPRECATED()
														
 
															-//
														
 
															-// Duplicated from absl/base/macros.h to avoid pulling in that library.
														
 
															-// Marks a deprecated class, struct, enum, function, method and variable
														
 
															-// declarations. The macro argument is used as a custom diagnostic message (e.g.
														
 
															-// suggestion of a better alternative).
														
 
															-//
														
 
															-// Example:
														
 
															-//
														
 
															-//   class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
														
 
															-//   TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
														
 
															-//
														
 
															-// Every usage of a deprecated entity will trigger a warning when compiled with
														
 
															-// clang's `-Wdeprecated-declarations` option. This option is turned off by
														
 
															-// default, but the warnings will be reported by clang-tidy.
														
 
															-#if defined(__clang__) && __cplusplus >= 201103L
														
 
															-#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
														
 
															-#endif
														
 
															-
														
 
															-#ifndef TFLITE_DEPRECATED
														
 
															-#define TFLITE_DEPRECATED(message)
														
 
															-#endif
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h
@@ -1,40 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
														
 
															-    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
														
 
															-#define TF_LITE_GLOBAL_STD_PREFIX
														
 
															-#else
														
 
															-#define TF_LITE_GLOBAL_STD_PREFIX std
														
 
															-#endif
														
 
															-
														
 
															-#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
														
 
															-  template <class T>                                  \
														
 
															-  inline T tf_name(const T x) {                       \
														
 
															-    return TF_LITE_GLOBAL_STD_PREFIX::std_name(x);    \
														
 
															-  }
														
 
															-
														
 
															-DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
														
 
															-DECLARE_STD_GLOBAL_SWITCH1(TfLiteExpm1, expm1);
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h
@@ -1,35 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
														
 
															-inline float TfLiteMax(const float& x, const float& y) {
														
 
															-  return std::max(x, y);
														
 
															-}
														
 
															-#else
														
 
															-template <class T>
														
 
															-inline T TfLiteMax(const T& x, const T& y) {
														
 
															-  return std::fmax(x, y);
														
 
															-}
														
 
															-#endif
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h
@@ -1,35 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
														
 
															-inline float TfLiteMin(const float& x, const float& y) {
														
 
															-  return std::min(x, y);
														
 
															-}
														
 
															-#else
														
 
															-template <class T>
														
 
															-inline T TfLiteMin(const T& x, const T& y) {
														
 
															-  return std::fmin(x, y);
														
 
															-}
														
 
															-#endif
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h
@@ -1,20 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
														
 
															-
														
 
															-// TFLM does not need to utilize any Neon optimizations.
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h
@@ -1,122 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
														
 
															-
														
 
															-#include <vector>
														
 
															-
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
														
 
															-  return RuntimeShape(data.size(), data.data());
														
 
															-}
														
 
															-
														
 
															-// A list of tensors in a format that can be used by kernels like split and
														
 
															-// concatenation.
														
 
															-template <typename T>
														
 
															-class VectorOfTensors {
														
 
															- public:
														
 
															-  // Build with the tensors in 'tensor_list'.
														
 
															-  VectorOfTensors(const TfLiteContext& context,
														
 
															-                  const TfLiteIntArray& tensor_list) {
														
 
															-    int num_tensors = tensor_list.size;
														
 
															-
														
 
															-    all_data_.reserve(num_tensors);
														
 
															-    all_shape_.reserve(num_tensors);
														
 
															-    all_shape_ptr_.reserve(num_tensors);
														
 
															-
														
 
															-    for (int i = 0; i < num_tensors; ++i) {
														
 
															-      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
														
 
															-      all_data_.push_back(GetTensorData<T>(t));
														
 
															-      all_shape_.push_back(GetTensorShape(t));
														
 
															-    }
														
 
															-
														
 
															-    // Taking the pointer from inside a std::vector is only OK if the vector is
														
 
															-    // never modified, so we populate all_shape in the previous loop and then we
														
 
															-    // are free to grab iterators here.
														
 
															-    for (int i = 0; i < num_tensors; ++i) {
														
 
															-      all_shape_ptr_.push_back(&all_shape_[i]);
														
 
															-    }
														
 
															-  }
														
 
															-  // Return a pointer to the data pointers of all tensors in the list. For
														
 
															-  // example:
														
 
															-  //   float* const* f = v.data();
														
 
															-  //   f[0][1] is the second element of the first tensor.
														
 
															-  T* const* data() const { return all_data_.data(); }
														
 
															-
														
 
															-  // Return a pointer the shape pointers of all tensors in the list. For
														
 
															-  // example:
														
 
															-  //   const RuntimeShape* const* d = v.dims();
														
 
															-  //   dims[1] are the dimensions of the second tensor in the list.
														
 
															-  const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
														
 
															-
														
 
															- private:
														
 
															-  std::vector<T*> all_data_;
														
 
															-  std::vector<RuntimeShape> all_shape_;
														
 
															-  std::vector<RuntimeShape*> all_shape_ptr_;
														
 
															-};
														
 
															-
														
 
															-// A list of quantized tensors in a format that can be used by kernels like
														
 
															-// split and concatenation.
														
 
															-class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
														
 
															- public:
														
 
															-  // Build with the tensors in 'tensor_list'.
														
 
															-  VectorOfQuantizedTensors(const TfLiteContext& context,
														
 
															-                           const TfLiteIntArray& tensor_list)
														
 
															-      : VectorOfTensors<uint8_t>(context, tensor_list) {
														
 
															-    for (int i = 0; i < tensor_list.size; ++i) {
														
 
															-      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
														
 
															-      zero_point_.push_back(t->params.zero_point);
														
 
															-      scale_.push_back(t->params.scale);
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  const float* scale() const { return scale_.data(); }
														
 
															-  const int32_t* zero_point() const { return zero_point_.data(); }
														
 
															-
														
 
															- private:
														
 
															-  std::vector<int32_t> zero_point_;
														
 
															-  std::vector<float> scale_;
														
 
															-};
														
 
															-
														
 
															-// Writes randomly accessed values from `input` sequentially into `output`.
														
 
															-template <typename T>
														
 
															-class SequentialTensorWriter {
														
 
															- public:
														
 
															-  SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) {
														
 
															-    input_data_ = GetTensorData<T>(input);
														
 
															-    output_ptr_ = GetTensorData<T>(output);
														
 
															-  }
														
 
															-  SequentialTensorWriter(const T* input_data, T* output_data)
														
 
															-      : input_data_(input_data), output_ptr_(output_data) {}
														
 
															-
														
 
															-  void Write(int position) { *output_ptr_++ = input_data_[position]; }
														
 
															-  void WriteN(int position, int len) {
														
 
															-    memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
														
 
															-    output_ptr_ += len;
														
 
															-  }
														
 
															-
														
 
															- private:
														
 
															-  const T* input_data_;
														
 
															-  T* output_ptr_;
														
 
															-};
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h
@@ -1,484 +0,0 @@
 
															-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <cmath>
														
 
															-#include <cstdint>
														
 
															-
														
 
															-#include "tensorflow/lite/c/builtin_op_data.h"
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-
														
 
															-#if defined(_MSC_VER)
														
 
															-#define __restrict__ __restrict
														
 
															-#endif
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace tensor_utils {
														
 
															-
														
 
															-// Multiplies a matrix with a scalar and reduce the result on each row to a
														
 
															-// scalar.
														
 
															-// Parameters:
														
 
															-//     - matrix: matrix of size n_row * n_col
														
 
															-//     - scalar: the scalar that is multiplied to each element in the matrix
														
 
															-//     - n_row:  the row count of the matrix
														
 
															-//     - n_col:  the column count of the matrix
														
 
															-//     - output: the 32bit output
														
 
															-// Note: We do not need saturation because the int8 * int8 is safe from overflow
														
 
															-// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
														
 
															-// initial output value is not exceptionally large.
														
 
															-void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
														
 
															-                                    int32_t n_row, int32_t n_col,
														
 
															-                                    int32_t* output);
														
 
															-
														
 
															-// Add another vector for each batch in the batch vector.
														
 
															-template <typename T>
														
 
															-void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
														
 
															-                          T* batch_vector) {
														
 
															-  for (int b = 0; b < n_batch; b++) {
														
 
															-    for (int i = 0; i < v_size; ++i) {
														
 
															-      batch_vector[i] += vector[i];
														
 
															-    }
														
 
															-    batch_vector += v_size;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Cwise product of two vectors.
														
 
															-template <typename T>
														
 
															-inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
														
 
															-                                     int v_size, T* result) {
														
 
															-  for (int v = 0; v < v_size; v++) {
														
 
															-    *result++ = *vector1++ * *vector2++;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Cwise product of a vector and a batch-vector.
														
 
															-template <typename T>
														
 
															-inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
														
 
															-                                          const T* batch_vector, int n_batch,
														
 
															-                                          T* result) {
														
 
															-  for (int b = 0; b < n_batch; b++) {
														
 
															-    VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
														
 
															-    // Update the pointers.
														
 
															-    result += v_size;
														
 
															-    batch_vector += v_size;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
														
 
															-// assumption here is that result array is initialized to valid values.
														
 
															-template <typename T>
														
 
															-inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
														
 
															-                                               const T* __restrict__ vector2,
														
 
															-                                               int v_size,
														
 
															-                                               T* __restrict__ result) {
														
 
															-  for (int v = 0; v < v_size; v++) {
														
 
															-    *result++ += *vector1++ * *vector2++;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
														
 
															-// operation, the assumption here is that result array is initialized to valid
														
 
															-// values.
														
 
															-template <typename T>
														
 
															-inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
														
 
															-                                                    const T* batch_vector,
														
 
															-                                                    int n_batch, T* result) {
														
 
															-  for (int b = 0; b < n_batch; b++) {
														
 
															-    VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
														
 
															-    // Update the pointers.
														
 
															-    result += v_size;
														
 
															-    batch_vector += v_size;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Batch vector initialization with another vector.
														
 
															-template <typename T>
														
 
															-void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
														
 
															-                             T* batch_vector) {
														
 
															-  for (int b = 0; b < n_batch; b++) {
														
 
															-    std::copy_n(vector, v_size, batch_vector + b * v_size);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Checks if all entries of vector are zero for float.
														
 
															-bool IsZeroVector(const float* vector, int v_size);
														
 
															-
														
 
															-// Checks if all entries of vector are zero for int8.
														
 
															-bool IsZeroVector(const int8_t* vector, int v_size);
														
 
															-
														
 
															-// Quantizes a buffer of floating point values using a symmetric quantization
														
 
															-// (i.e. linear quantization without an offset) to 8-bit signed integers.
														
 
															-// It also outputs the range (min, max) of the floating point buffer, and the
														
 
															-// scaling factor used to quantize the values.
														
 
															-void SymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                             int8_t* quantized_values, float* min_value,
														
 
															-                             float* max_value, float* scaling_factor);
														
 
															-
														
 
															-// Quantizes a buffer of floating point values using a symmetric quantization
														
 
															-// (i.e. linear quantization without an offset) to 8-bit signed integers.
														
 
															-// It uses the range (min, max) provided to the function to calculate the
														
 
															-// appropriate scaling factor to quantize the values.
														
 
															-void SymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                             int8_t* quantized_values, float min_value,
														
 
															-                             float max_value, float* scaling_factor);
														
 
															-
														
 
															-void AsymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                              int8_t* quantized_values, float* scaling_factor,
														
 
															-                              int32_t* offset);
														
 
															-
														
 
															-// Helper function to quantize floats.
														
 
															-// float_data_ptr     input float vectors
														
 
															-// n_batch            number of input vectors
														
 
															-// n_data             size of a single input vector
														
 
															-// quantized_data_ptr (out) vector with quantized data
														
 
															-// scaling_factors    (out) scaling factors (one per vector)
														
 
															-// zero_points        (out) zero points (one per vector)
														
 
															-// do_asymmetric      controls if the quantization should be asymmetric.
														
 
															-inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
														
 
															-                                int n_data, int8_t* quantized_data_ptr,
														
 
															-                                float* scaling_factors, int32_t* zero_points,
														
 
															-                                bool do_asymmetric) {
														
 
															-  for (int b = 0; b < n_batch; ++b) {
														
 
															-    const int offset = b * n_data;
														
 
															-    if (do_asymmetric) {
														
 
															-      tensor_utils::AsymmetricQuantizeFloats(
														
 
															-          float_data_ptr + offset, n_data, quantized_data_ptr + offset,
														
 
															-          &scaling_factors[b], &zero_points[b]);
														
 
															-    } else {
														
 
															-      float unused_min, unused_max;
														
 
															-      tensor_utils::SymmetricQuantizeFloats(
														
 
															-          float_data_ptr + offset, n_data, quantized_data_ptr + offset,
														
 
															-          &unused_min, &unused_max, &scaling_factors[b]);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
														
 
															-// dimension composed by input vectors independent from each other). The result
														
 
															-// of the multiplication is accumulated to the passed result buffer.
														
 
															-// More specifically, for a matrix M of shape [n, i] and a batched-vector
														
 
															-// of shape [i, batch] it will first compute the product of shape [n, batch].
														
 
															-// This product will be accumulated to the result buffer.
														
 
															-void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
														
 
															-                                         int m_cols, const float* vector,
														
 
															-                                         int n_batch, float* result);
														
 
															-
														
 
															-// Same as the function above, but the matrix is a sparse tensor with block
														
 
															-// pattern 1x4.
														
 
															-// This function assumes that m_cols is a multiple of the block size (4 in this
														
 
															-// case) so that there's no incomplete block.
														
 
															-void SparseMatrixBatchVectorMultiplyAccumulate1x4(
														
 
															-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
														
 
															-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
														
 
															-    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
														
 
															-
														
 
															-// Same as the function above, but the matrix is stored in block compressed
														
 
															-// sparse row format with block pattern 1x16 which consists of two arrays:
														
 
															-//   1. A matrix array stores non-zero blocks of the matrix in row major.
														
 
															-//   2. A ledger array stores nrows groups, one group per row. Each group starts
														
 
															-//      with an integer representing the number of non-zero blocks for the
														
 
															-//      corresponding row and follows with column indexes of the first element
														
 
															-//      of each non-zero block.
														
 
															-// This function assumes that
														
 
															-//   1. m_cols is a multiple of 16 so that all blocks are full blocks.
														
 
															-//   2. m_cols < 254 * 16 so that block index can be represented by uint8.
														
 
															-void SparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
														
 
															-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
														
 
															-    float* __restrict__ result);
														
 
															-
														
 
															-// Same as the function above, but for values quantized using symmetric
														
 
															-// quantization (e.g. by calling SymmetricQuantizeFloats).
														
 
															-// The passed scaling factors is a buffer of the quantization scaling factors
														
 
															-// that will be used to dequentize the products into the final result buffer.
														
 
															-// These scaling factors are the multiplication of the matrix scaling factor
														
 
															-// by the vector's scaling factor, one per batch (i.e. this allows quantizing
														
 
															-// each batch in the batch-vector matrix independently).
														
 
															-void MatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
														
 
															-    const int8_t* __restrict__ vectors,
														
 
															-    const float* __restrict__ scaling_factors, int n_batch,
														
 
															-    float* __restrict__ result);
														
 
															-
														
 
															-// Same as the function above except that vector values
														
 
															-// are quantized with asymmetric quantization per-batch and the matrix
														
 
															-// is quantized per row.
														
 
															-void MatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
														
 
															-    const int8_t* __restrict__ vectors,
														
 
															-    const float* __restrict__ scaling_factors, int n_batch,
														
 
															-    float* __restrict__ result, const float* __restrict__ per_channel_scale,
														
 
															-    const int32_t* __restrict__ input_offset);
														
 
															-
														
 
															-// Same as the function above, but the matrix is a sparse tensor with block
														
 
															-// pattern 1x16.
														
 
															-// This function assumes that m_cols is a multiple of the block size (16 in this
														
 
															-// case) so that there's no incomplete block. Also, it assumes all offsets of
														
 
															-// input, output and filter are zero.
														
 
															-void SparseMatrixBatchVectorMultiplyAccumulate1x16(
														
 
															-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
														
 
															-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
														
 
															-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
														
 
															-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
														
 
															-    const int32_t output_shift, const int32_t output_offset,
														
 
															-    const int32_t output_activation_min, const int32_t output_activation_max,
														
 
															-    int8_t* __restrict__ result);
														
 
															-
														
 
															-// Same as the function above, but the matrix is stored in block compressed
														
 
															-// sparse row format with block pattern 1x16 which consists of two arrays:
														
 
															-//   1. A matrix array stores non-zero blocks of the matrix in row major.
														
 
															-//   2. A ledger array stores nrows groups, one group per row. Each group starts
														
 
															-//      with an integer representing the number of non-zero blocks for the
														
 
															-//      corresponding row followed by column index of the first element of
														
 
															-//      each non-zero block.
														
 
															-// This function assumes that
														
 
															-//   1. m_cols is a multiple of 16 so that all blocks are full blocks.
														
 
															-//   2. m_cols < 254 * 16 so that block index can be represented by uint8.
														
 
															-void SparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger,
														
 
															-    const int m_rows, const int m_cols, const int8_t* __restrict__ vectors,
														
 
															-    const float* __restrict__ scaling_factors, int n_batch,
														
 
															-    float* __restrict__ result);
														
 
															-
														
 
															-// Same as the above 8, 8, 8 integer matmul except for the presence of zero
														
 
															-// point and non-accumulative.
														
 
															-// TODO(b/148688698): remove this function by folding zero point calculation in
														
 
															-// prepare() function.
														
 
															-void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
														
 
															-                               const int8_t* input_to_gate_weights,
														
 
															-                               int32_t input_to_gate_effective_scale_a,
														
 
															-                               int32_t input_to_gate_effective_scale_b,
														
 
															-                               int32_t n_batch, int32_t n_input, int32_t n_cell,
														
 
															-                               int8_t* gate_output, int8_t gate_output_zp);
														
 
															-
														
 
															-// Same as above but has 16 bit and 8 bit input and 8 bit output.
														
 
															-// Used in projection when hidden is 16bit.
														
 
															-void MatrixBatchVectorMultiply(const int16_t* hidden,
														
 
															-                               const int8_t* hidden_to_output_weights,
														
 
															-                               int32_t proj_effective_scale_a,
														
 
															-                               int32_t proj_effective_scale_b,
														
 
															-                               const int32_t* gate_bias, int32_t n_batch,
														
 
															-                               int32_t n_hidden, int32_t n_output,
														
 
															-                               int32_t output_zp, int8_t* proj_output);
														
 
															-
														
 
															-// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
														
 
															-// vector.
														
 
															-// Parameters:
														
 
															-//     - input: batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - layer_norm_weights:  the quantized layer normalization weights.
														
 
															-//     - bias: the bias for the layer normalization.
														
 
															-//     - layer_norm_scale_a: multiplier for scale factor.
														
 
															-//     - layer_norm_scale_b: shift for scale factor.
														
 
															-//     - variance_limit: the guard to make sure the inverse does not overflow.
														
 
															-//     - n_batch: the number of batches.
														
 
															-//     - n_input: the size for input and output.
														
 
															-//     - output:  the 16 bit output
														
 
															-void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
														
 
															-                    const int32_t* bias, int32_t layer_norm_scale_a,
														
 
															-                    int32_t layer_norm_scale_b, int32_t variance_limit,
														
 
															-                    int n_batch, int n_input, int16_t* output);
														
 
															-
														
 
															-// Same as above but the internal calculation is done in float.
														
 
															-void ApplyLayerNormFloat(const int16_t* input,
														
 
															-                         const int16_t* layer_norm_weights,
														
 
															-                         int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
														
 
															-                         const int32_t* bias, int n_batch, int n_input,
														
 
															-                         int16_t* output);
														
 
															-
														
 
															-// Apply Sigmoid to a quantized vector.
														
 
															-// Parameters:
														
 
															-//     - input: batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - n_batch: the number of batches.
														
 
															-//     - n_input: the size for input and output.
														
 
															-//     - output:  the 16 bit output
														
 
															-// The input is in Q3.12 format and the output is in Q0.15 format.
														
 
															-void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
														
 
															-                  int16_t* output);
														
 
															-
														
 
															-// Same as above but the internal calcualtion is float.
														
 
															-void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
														
 
															-                       int16_t* output);
														
 
															-
														
 
															-// Apply Tanh to a quantized vector.
														
 
															-// Parameters:
														
 
															-//     - integer_bits: the integer bits of the input.
														
 
															-//                     Currently supports 0, 1, 2, 3, 4, 5, 6.
														
 
															-//     - input: batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - n_batch: the number of batches.
														
 
															-//     - n_input: the size for input and output.
														
 
															-//     - output:  the 16 bit output
														
 
															-// The input is in Qm.15-m format and the output is in Q0.15 format.
														
 
															-void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch,
														
 
															-               int32_t n_input, int16_t* output);
														
 
															-
														
 
															-// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
														
 
															-//    - Input has 2^(integer_bits) as scale.
														
 
															-//    - Output has Q0.15 as scale.
														
 
															-void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
														
 
															-                    int32_t integer_bits, int16_t* output);
														
 
															-
														
 
															-// Element-wise multiplication of two quantized vectors.
														
 
															-// Parameters:
														
 
															-//     - input_1: batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - input_2: batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - n_batch: the number of batches.
														
 
															-//     - n_input: the size for input and output.
														
 
															-//     - shift:   the shift needed to produce the output.
														
 
															-//     - output:  the 16 bit output of size n_batch * n_input.
														
 
															-// Output does not need to be initialized.
														
 
															-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
														
 
															-              int n_input, int shift, int16_t* output);
														
 
															-
														
 
															-// Element-wise multiplication of two quantized vectors.
														
 
															-// Parameters:
														
 
															-//     - input_1: batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - input_2: batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - n_batch: the number of batches.
														
 
															-//     - n_input: the size for input and output.
														
 
															-//     - shift:   the shift needed to produce the output.
														
 
															-//     - output:  the 8 bit output of size n_batch * n_input.
														
 
															-// Output does not need to be initialized.
														
 
															-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
														
 
															-              int n_input, int shift, int8_t* output);
														
 
															-
														
 
															-// Element-wise multiplication of two quantized vectors with rescaling.
														
 
															-// Parameters:
														
 
															-//     - input_1:    batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - input_2:    batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - multiplier: the multiplier part of scale.
														
 
															-//     - shift:      the shift part of scale.
														
 
															-//     - n_batch:    the number of batches.
														
 
															-//     - n_input:    the size for input and output.
														
 
															-//     - output:     the 8 bit output of size n_batch * n_input.
														
 
															-//     - output_zp:  the zero point of output.
														
 
															-// Output does not need to be initialized.
														
 
															-// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
														
 
															-// 2^(s - 31).
														
 
															-void CwiseMul(const int16_t* input_1, const int16_t* input_2,
														
 
															-              int32_t multiplier, int32_t shift, int32_t n_batch,
														
 
															-              int32_t n_input, int32_t output_zp, int8_t* output);
														
 
															-
														
 
															-// Element-wise saturating addition of two quantized vectors without rescaling.
														
 
															-// Parameters:
														
 
															-//     - input_1:    batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - input_2:    batch vector of size n_batch * n_input; 16 bit.
														
 
															-//     - n_batch:    the number of batches.
														
 
															-//     - n_input:    the size for input and output.
														
 
															-//     - output:     the 8 bit output of size n_batch * n_input.
														
 
															-// Output does not need to be initialized.
														
 
															-void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
														
 
															-              int n_input, int16_t* output);
														
 
															-
														
 
															-// Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
														
 
															-// int8_t. Parameters:
														
 
															-//     - vector:         vector of size v_size.
														
 
															-//     - v_size:         the size of the vector.
														
 
															-//     - clipping_value: the value used for clipping.
														
 
															-void CwiseClipping(float* vector, const int v_size, const float clipping_value);
														
 
															-void CwiseClipping(int16_t* vector, const int v_size,
														
 
															-                   const int16_t clipping_value);
														
 
															-void CwiseClipping(int8_t* vector, const int v_size,
														
 
															-                   const int8_t clipping_value);
														
 
															-
														
 
															-// Dot product of two vectors.
														
 
															-float VectorVectorDotProduct(const float* vector1, const float* vector2,
														
 
															-                             int v_size);
														
 
															-
														
 
															-// Dot product of two batch vectors of size n_batch * v_size:
														
 
															-// vector1 = [x_1_1, x_1_2, ..., x_1_vsize,
														
 
															-//            x_2_1, x_2_2, ..., x_2_vsize,
														
 
															-//            ...
														
 
															-//            x_nbatch_1,..., x_nbatch_vsize]
														
 
															-// vector2 = [y_1_1, y_1_2, ..., y_1_vsize,
														
 
															-//            y_2_1, y_2_2, ..., y_2_vsize,
														
 
															-//            ...
														
 
															-//            y_nbatch_1,..., y_nbatch_vsize]
														
 
															-// Then result will be a vector of n_batch size starting from 'result':
														
 
															-// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize,
														
 
															-//  x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize,
														
 
															-//  ...
														
 
															-//  x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize]
														
 
															-template <typename T>
														
 
															-inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2,
														
 
															-                                             int v_size, int n_batch,
														
 
															-                                             T* result) {
														
 
															-  for (int b = 0; b < n_batch; b++) {
														
 
															-    result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
														
 
															-    vector1 += v_size;
														
 
															-    vector2 += v_size;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Same as above but input is 16bit and output is 32bit.
														
 
															-void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
														
 
															-                                      const int16_t* vector2, int v_size,
														
 
															-                                      int n_batch, int32_t* result);
														
 
															-
														
 
															-// Same as above, but inputs are 16bit integer and output is 16bit integer.
														
 
															-void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
														
 
															-                                             const int16_t* batch_vector,
														
 
															-                                             int n_batch, int32_t multiplier,
														
 
															-                                             int shift, int16_t* result);
														
 
															-
														
 
															-// Compute "1.0f - elements of vector" (used in CIFG).
														
 
															-void Sub1Vector(const float* vector, int v_size, float* result);
														
 
															-
														
 
															-// Compute "1.0f - elements of vector" (used in CIFG) for int16 input.
														
 
															-// "vector" has range [0, 32767] because it is the output of sigmoid function.
														
 
															-void Sub1Vector(const int16_t* vector, int v_size, int16_t* result);
														
 
															-
														
 
															-// Multiply all elements of vector with a scalar.
														
 
															-void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
														
 
															-                          float* result);
														
 
															-
														
 
															-// Reduce-sum on a float input vector:
														
 
															-// input_vector: float pointer to input vector.
														
 
															-// output_vector: float pointer to vector.
														
 
															-// output_size: output vector size.
														
 
															-// reduction_size: number of consecutive elements from input vector which are
														
 
															-// added to get one element of output.
														
 
															-void ReductionSumVector(const float* input_vector, float* output_vector,
														
 
															-                        int output_size, int reduction_size);
														
 
															-
														
 
															-// Same as above but input/output is 32 bit integer.
														
 
															-void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
														
 
															-                        int output_size, int reduction_size);
														
 
															-
														
 
															-// Same as above but input is 8 bit integer.
														
 
															-void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
														
 
															-                        int output_size, int reduction_size);
														
 
															-
														
 
															-// Layer norm for each batch.
														
 
															-void MeanStddevNormalization(const float* input_vector, float* output_vector,
														
 
															-                             int v_size, int n_batch);
														
 
															-
														
 
															-// Saturate Add with rescale on both inputs.
														
 
															-void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
														
 
															-                          const int8_t* recurrent, int8_t recurrent_zp,
														
 
															-                          int32_t input_effective_scale_a,
														
 
															-                          int32_t input_effective_scale_b,
														
 
															-                          int32_t recurrent_effective_scale_a,
														
 
															-                          int32_t recurrent_effective_scale_b, int32_t n_batch,
														
 
															-                          int32_t n_cell, int16_t* output);
														
 
															-
														
 
															-}  // namespace tensor_utils
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc
@@ -1,416 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/quantization_util.h"
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <cmath>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace {
														
 
															-// These constants are used to manipulate the binary representation of doubles.
														
 
															-// Double-precision binary64 floating point format is:
														
 
															-// Bit |  63  |  62-52   |   51-0   |
														
 
															-//     | Sign | Exponent | Fraction |
														
 
															-// To avoid 64-bit integers as much as possible, I break this into high and
														
 
															-// low 32-bit chunks. High is:
														
 
															-// Bit |  31  |  30-20   |      19-0     |
														
 
															-//     | Sign | Exponent | High Fraction |
														
 
															-// Low is:
														
 
															-// Bit |     31-0     |
														
 
															-//     | Low Fraction |
														
 
															-// We then access the components through logical bit-wise operations to
														
 
															-// extract the parts needed, with the positions and masks derived from the
														
 
															-// layout shown above.
														
 
															-constexpr uint64_t kSignMask = 0x8000000000000000LL;
														
 
															-constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
														
 
															-constexpr int32_t kExponentShift = 52;
														
 
															-constexpr int32_t kExponentBias = 1023;
														
 
															-constexpr uint32_t kExponentIsBadNum = 0x7ff;
														
 
															-constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
														
 
															-constexpr uint32_t kFractionShift = 22;
														
 
															-constexpr uint32_t kFractionRoundingMask = 0x003fffff;
														
 
															-constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
														
 
															-}  // namespace
														
 
															-
														
 
															-void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
														
 
															-                        int* shift) {
														
 
															-#if TFLITE_SINGLE_ROUNDING
														
 
															-  // Single-rounding MultiplyByQuantizedMultiplier only supports positive
														
 
															-  // multipliers.
														
 
															-  // TFLITE_DCHECK(double_multiplier >= 0);
														
 
															-#endif
														
 
															-  if (double_multiplier == 0.) {
														
 
															-    *quantized_multiplier = 0;
														
 
															-    *shift = 0;
														
 
															-    return;
														
 
															-  }
														
 
															-#ifdef TFLITE_EMULATE_FLOAT
														
 
															-  // If we're trying to avoid the use of floating-point instructions (for
														
 
															-  // example on microcontrollers) then use an alternative implementation
														
 
															-  // that only requires integer and bitwise operations. To enable this, you
														
 
															-  // need to set the define during the build process for your platform.
														
 
															-  int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
														
 
															-#else   // TFLITE_EMULATE_FLOAT
														
 
															-  const double q = std::frexp(double_multiplier, shift);
														
 
															-  auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1LL << 31)));
														
 
															-#endif  // TFLITE_EMULATE_FLOAT
														
 
															-  TFLITE_CHECK(q_fixed <= (1LL << 31));
														
 
															-  if (q_fixed == (1LL << 31)) {
														
 
															-    q_fixed /= 2;
														
 
															-    ++*shift;
														
 
															-  }
														
 
															-  TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
														
 
															-  // A shift amount smaller than -31 would cause all bits to be shifted out
														
 
															-  // and thus all results would be zero. We implement that instead with
														
 
															-  // q_fixed==0, so as to avoid hitting issues with right-shift
														
 
															-  // operations with shift amounts greater than 31. Note that this happens
														
 
															-  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
														
 
															-  // that we're effectively flushing tiny double_multiplier's to zero.
														
 
															-  // We could conceivably handle values in the range (roughly) [32, 63]
														
 
															-  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
														
 
															-  // the present handling is just doing 'flush denormals to zero'. We could
														
 
															-  // reconsider and actually generate nonzero denormals if a need arises.
														
 
															-  if (*shift < -31) {
														
 
															-    *shift = 0;
														
 
															-    q_fixed = 0;
														
 
															-  }
														
 
															-#if TFLITE_SINGLE_ROUNDING
														
 
															-  // Single-rounding MultiplyByQuantizedMultiplier doesn't support a shift > 30,
														
 
															-  // saturate it.
														
 
															-  if (*shift > 30) {
														
 
															-    *shift = 30;
														
 
															-    q_fixed = (1LL << 31) - 1;
														
 
															-  }
														
 
															-#endif
														
 
															-  *quantized_multiplier = static_cast<int32_t>(q_fixed);
														
 
															-}
														
 
															-
														
 
															-void QuantizeMultiplierGreaterThanOne(double double_multiplier,
														
 
															-                                      int32_t* quantized_multiplier,
														
 
															-                                      int* left_shift) {
														
 
															-  TFLITE_CHECK_GT(double_multiplier, 1.);
														
 
															-  QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
														
 
															-  TFLITE_CHECK_GE(*left_shift, 0);
														
 
															-}
														
 
															-
														
 
															-void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
														
 
															-                                         int32_t* quantized_multiplier,
														
 
															-                                         int* left_shift) {
														
 
															-  TFLITE_CHECK_LT(double_multiplier, 1.);
														
 
															-  TFLITE_CHECK_GT(double_multiplier, 0.);
														
 
															-  int shift;
														
 
															-  QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
														
 
															-  TFLITE_CHECK_LE(shift, 0);
														
 
															-  *left_shift = shift;
														
 
															-}
														
 
															-
														
 
															-int64_t IntegerFrExp(double input, int* shift) {
														
 
															-  // Make sure our assumptions about the double layout hold.
														
 
															-  TFLITE_CHECK_EQ(8, sizeof(double));
														
 
															-
														
 
															-  // We want to access the bits of the input double value directly, which is
														
 
															-  // tricky to do safely, so use a union to handle the casting.
														
 
															-  union {
														
 
															-    double double_value;
														
 
															-    uint64_t double_as_uint;
														
 
															-  } cast_union;
														
 
															-  cast_union.double_value = input;
														
 
															-  const uint64_t u = cast_union.double_as_uint;
														
 
															-
														
 
															-  // If the bitfield is all zeros apart from the sign bit, this is a normalized
														
 
															-  // zero value, so return standard values for this special case.
														
 
															-  if ((u & ~kSignMask) == 0) {
														
 
															-    *shift = 0;
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  // Deal with NaNs and Infs, which are always indicated with a fixed pattern in
														
 
															-  // the exponent, and distinguished by whether the fractions are zero or
														
 
															-  // non-zero.
														
 
															-  const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
														
 
															-  if (exponent_part == kExponentIsBadNum) {
														
 
															-    *shift = std::numeric_limits<int>::max();
														
 
															-    if (u & kFractionMask) {
														
 
															-      // NaN, so just return zero (with the exponent set to INT_MAX).
														
 
															-      return 0;
														
 
															-    } else {
														
 
															-      // Infinity, so return +/- INT_MAX.
														
 
															-      if (u & kSignMask) {
														
 
															-        return std::numeric_limits<int64_t>::min();
														
 
															-      } else {
														
 
															-        return std::numeric_limits<int64_t>::max();
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  // The shift is fairly easy to extract from the high bits of the double value,
														
 
															-  // just by masking it out and applying a bias. The std::frexp() implementation
														
 
															-  // always returns values between 0.5 and 1.0 though, whereas the exponent
														
 
															-  // assumes 1.0 to 2.0 is the standard range, so I add on one to match that
														
 
															-  // interface.
														
 
															-  *shift = (exponent_part - kExponentBias) + 1;
														
 
															-
														
 
															-  // There's an implicit high bit in the double format definition, so make sure
														
 
															-  // we include that at the top, and then reconstruct the rest of the fractional
														
 
															-  // value from the remaining fragments.
														
 
															-  int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
														
 
															-
														
 
															-  // We're cutting off some bits at the bottom, so to exactly match the standard
														
 
															-  // frexp implementation here we'll apply rounding by adding one to the least
														
 
															-  // significant bit of the result if the discarded portion is over half of the
														
 
															-  // maximum.
														
 
															-  if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
														
 
															-    fraction += 1;
														
 
															-  }
														
 
															-  // Negate the fraction if the sign bit was set.
														
 
															-  if (u & kSignMask) {
														
 
															-    fraction *= -1;
														
 
															-  }
														
 
															-
														
 
															-  return fraction;
														
 
															-}
														
 
															-
														
 
															-double DoubleFromFractionAndShift(int64_t fraction, int shift) {
														
 
															-  union {
														
 
															-    double double_value;
														
 
															-    uint64_t double_as_uint;
														
 
															-  } result;
														
 
															-
														
 
															-  // Detect NaNs and infinities.
														
 
															-  if (shift == std::numeric_limits<int>::max()) {
														
 
															-    if (fraction == 0) {
														
 
															-      return std::numeric_limits<double>::quiet_NaN();
														
 
															-    } else if (fraction > 0) {
														
 
															-      return std::numeric_limits<double>::infinity();
														
 
															-    } else {
														
 
															-      return -std::numeric_limits<double>::infinity();
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  // Return a normalized zero for a zero fraction.
														
 
															-  if (fraction == 0) {
														
 
															-    result.double_as_uint = 0;
														
 
															-    return result.double_value;
														
 
															-  }
														
 
															-
														
 
															-  bool is_negative = (fraction < 0);
														
 
															-  int64_t encoded_fraction = is_negative ? -fraction : fraction;
														
 
															-  int64_t encoded_shift = (shift - 1);
														
 
															-  while (encoded_fraction < 0x40000000) {
														
 
															-    encoded_fraction *= 2;
														
 
															-    encoded_shift -= 1;
														
 
															-  }
														
 
															-  while (encoded_fraction > 0x80000000) {
														
 
															-    encoded_fraction /= 2;
														
 
															-    encoded_shift += 1;
														
 
															-  }
														
 
															-  encoded_fraction -= 0x40000000;
														
 
															-  if (encoded_shift < -1022) {
														
 
															-    encoded_shift = -1023;
														
 
															-  } else if (encoded_shift > 1022) {
														
 
															-    encoded_shift = 1023;
														
 
															-  }
														
 
															-  encoded_shift += kExponentBias;
														
 
															-  uint64_t encoded_sign = is_negative ? kSignMask : 0;
														
 
															-  result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
														
 
															-                          (encoded_fraction << kFractionShift);
														
 
															-  return result.double_value;
														
 
															-}
														
 
															-
														
 
															-double IntegerDoubleMultiply(double a, double b) {
														
 
															-  int a_shift;
														
 
															-  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
														
 
															-  int b_shift;
														
 
															-  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
														
 
															-  // Detect NaNs and infinities.
														
 
															-  if (a_shift == std::numeric_limits<int>::max() ||
														
 
															-      (b_shift == std::numeric_limits<int>::max())) {
														
 
															-    return std::numeric_limits<double>::quiet_NaN();
														
 
															-  }
														
 
															-  const int result_shift = a_shift + b_shift + 1;
														
 
															-  const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
														
 
															-  return DoubleFromFractionAndShift(result_fraction, result_shift);
														
 
															-}
														
 
															-
														
 
															-int IntegerDoubleCompare(double a, double b) {
														
 
															-  int a_shift;
														
 
															-  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
														
 
															-  int b_shift;
														
 
															-  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
														
 
															-
														
 
															-  // Detect NaNs and infinities.
														
 
															-  if (a_shift == std::numeric_limits<int>::max() ||
														
 
															-      (b_shift == std::numeric_limits<int>::max())) {
														
 
															-    return 1;
														
 
															-  }
														
 
															-
														
 
															-  if ((a_fraction == 0) && (b_fraction < 0)) {
														
 
															-    return 1;
														
 
															-  } else if ((a_fraction < 0) && (b_fraction == 0)) {
														
 
															-    return -1;
														
 
															-  } else if (a_shift < b_shift) {
														
 
															-    return -1;
														
 
															-  } else if (a_shift > b_shift) {
														
 
															-    return 1;
														
 
															-  } else if (a_fraction < b_fraction) {
														
 
															-    return -1;
														
 
															-  } else if (a_fraction > b_fraction) {
														
 
															-    return 1;
														
 
															-  } else {
														
 
															-    return 0;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PreprocessSoftmaxScaling(double beta, double input_scale,
														
 
															-                              int input_integer_bits,
														
 
															-                              int32_t* quantized_multiplier, int* left_shift) {
														
 
															-  // If the overall multiplier (input and beta) is large, then exp() of an
														
 
															-  // input difference of 1 scaled by this will be large.  In other words, we
														
 
															-  // can cap the multiplier and know that, when it is used, the output will be
														
 
															-  // (round to) zero wherever the input is not at the maximum value.
														
 
															-
														
 
															-  // If the overall scale is less than one, and input_integer_bits=0, then the
														
 
															-  // result is double equivalent of Q0.31 (actually with more precision). Thus
														
 
															-  // this generates a Q(input_integer_bits).(31-input_integer_bits)
														
 
															-  // representation.
														
 
															-#if TFLITE_SINGLE_ROUNDING
														
 
															-  const double max_real_multiplier = (1LL << 30) - 1.0;
														
 
															-#else
														
 
															-  const double max_real_multiplier = (1LL << 31) - 1.0;
														
 
															-#endif
														
 
															-
														
 
															-#ifdef TFLITE_EMULATE_FLOAT
														
 
															-  const double input_beta = IntegerDoubleMultiply(beta, input_scale);
														
 
															-  int shift;
														
 
															-  int64_t fraction = IntegerFrExp(input_beta, &shift);
														
 
															-  shift += (31 - input_integer_bits);
														
 
															-  double input_beta_real_multiplier =
														
 
															-      DoubleFromFractionAndShift(fraction, shift);
														
 
															-  if (IntegerDoubleCompare(input_beta_real_multiplier, max_real_multiplier) >
														
 
															-      0) {
														
 
															-    input_beta_real_multiplier = max_real_multiplier;
														
 
															-  }
														
 
															-#else   // TFLITE_EMULATE_FLOAT
														
 
															-  const double input_beta_real_multiplier =
														
 
															-      std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)),
														
 
															-                       max_real_multiplier);
														
 
															-#endif  // TFLITE_EMULATE_FLOAT
														
 
															-
														
 
															-  QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
														
 
															-                                   quantized_multiplier, left_shift);
														
 
															-}
														
 
															-
														
 
															-void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
														
 
															-                                    int input_integer_bits,
														
 
															-                                    int32_t* quantized_multiplier,
														
 
															-                                    int* left_shift,
														
 
															-                                    int32_t* reverse_scaling_divisor,
														
 
															-                                    int* reverse_scaling_left_shift) {
														
 
															-  PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
														
 
															-                           quantized_multiplier, left_shift);
														
 
															-
														
 
															-  // Also calculate what amounts to the inverse scaling factor for the input.
														
 
															-  const double real_reverse_scaling_divisor =
														
 
															-      (1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
														
 
															-  tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
														
 
															-                                              reverse_scaling_divisor,
														
 
															-                                              reverse_scaling_left_shift);
														
 
															-}
														
 
															-
														
 
															-int CalculateInputRadius(int input_integer_bits, int input_left_shift,
														
 
															-                         int total_signed_bits) {
														
 
															-#ifdef TFLITE_EMULATE_FLOAT
														
 
															-  int64_t result = (1 << input_integer_bits) - 1;
														
 
															-  result <<= (total_signed_bits - input_integer_bits);
														
 
															-  result >>= input_left_shift;
														
 
															-  return result;
														
 
															-#else   // TFLITE_EMULATE_FLOAT
														
 
															-  const double max_input_rescaled =
														
 
															-      1.0 * ((1 << input_integer_bits) - 1) *
														
 
															-      (1LL << (total_signed_bits - input_integer_bits)) /
														
 
															-      (1LL << input_left_shift);
														
 
															-  // Tighten bound using floor.  Suppose that we could use the exact value.
														
 
															-  // After scaling the difference, the result would be at the maximum.  Thus we
														
 
															-  // must ensure that our value has lower magnitude.
														
 
															-  return static_cast<int>(std::floor(max_input_rescaled));
														
 
															-#endif  // TFLITE_EMULATE_FLOAT
														
 
															-}
														
 
															-
														
 
															-void NudgeQuantizationRange(const float min, const float max,
														
 
															-                            const int quant_min, const int quant_max,
														
 
															-                            float* nudged_min, float* nudged_max,
														
 
															-                            float* nudged_scale) {
														
 
															-  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
														
 
															-  const float quant_min_float = static_cast<float>(quant_min);
														
 
															-  const float quant_max_float = static_cast<float>(quant_max);
														
 
															-  *nudged_scale = (max - min) / (quant_max_float - quant_min_float);
														
 
															-  const float zero_point_from_min = quant_min_float - min / *nudged_scale;
														
 
															-  uint16_t nudged_zero_point;
														
 
															-  if (zero_point_from_min < quant_min_float) {
														
 
															-    nudged_zero_point = static_cast<uint16_t>(quant_min);
														
 
															-  } else if (zero_point_from_min > quant_max_float) {
														
 
															-    nudged_zero_point = static_cast<uint16_t>(quant_max);
														
 
															-  } else {
														
 
															-    nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
														
 
															-  }
														
 
															-  *nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
														
 
															-  *nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
														
 
															-}
														
 
															-
														
 
															-void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
														
 
															-                       const float nudged_max, const float* input_data,
														
 
															-                       float* output_data, const float size) {
														
 
															-  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
														
 
															-  const float inv_nudged_scale = 1.0f / nudged_scale;
														
 
															-
														
 
															-  for (int i = 0; i < size; i++) {
														
 
															-    const float src_val = input_data[i];
														
 
															-    const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
														
 
															-    const float clamped_shifted = clamped - nudged_min;
														
 
															-    const float dst_val =
														
 
															-        TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
														
 
															-        nudged_min;
														
 
															-    output_data[i] = dst_val;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-bool CheckedLog2(const float x, int* log2_result) {
														
 
															-  // Using TfLiteRound instead of std::round and std::log instead of
														
 
															-  // std::log2 to work around these functions being missing in a toolchain
														
 
															-  // used in some TensorFlow tests as of May 2018.
														
 
															-  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
														
 
															-  const float x_log2_rounded = TfLiteRound(x_log2);
														
 
															-  const float x_log2_fracpart = x_log2 - x_log2_rounded;
														
 
															-
														
 
															-  *log2_result = static_cast<int>(x_log2_rounded);
														
 
															-  return std::abs(x_log2_fracpart) < 1e-3f;
														
 
															-}
														
 
															-
														
 
															-void QuantizeMultiplierArray(const double* effective_scales, size_t size,
														
 
															-                             int32_t* effective_scale_significand,
														
 
															-                             int* effective_shift) {
														
 
															-  for (size_t i = 0; i < size; ++i) {
														
 
															-    QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
														
 
															-                       &effective_shift[i]);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace tflite
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h
@@ -1,292 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-#include <cstdint>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-// Given the min and max values of a float array, return
														
 
															-// reasonable quantization parameters to use for this array.
														
 
															-template <typename T>
														
 
															-QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
														
 
															-                                            bool narrow_range) {
														
 
															-  const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
														
 
															-  const T qmax = std::numeric_limits<T>::max();
														
 
															-  const double qmin_double = qmin;
														
 
															-  const double qmax_double = qmax;
														
 
															-  // 0 should always be a representable value. Let's assume that the initial
														
 
															-  // min,max range contains 0.
														
 
															-  TFLITE_CHECK_LE(rmin, 0.);
														
 
															-  TFLITE_CHECK_GE(rmax, 0.);
														
 
															-  if (rmin == rmax) {
														
 
															-    // Special case where the min,max range is a point. Should be {0}.
														
 
															-    TFLITE_CHECK_EQ(rmin, 0.);
														
 
															-    TFLITE_CHECK_EQ(rmax, 0.);
														
 
															-    QuantizationParams quantization_params;
														
 
															-    quantization_params.zero_point = 0;
														
 
															-    quantization_params.scale = 0.;
														
 
															-    return quantization_params;
														
 
															-  }
														
 
															-
														
 
															-  // General case.
														
 
															-  //
														
 
															-  // First determine the scale.
														
 
															-  const double scale = (rmax - rmin) / (qmax_double - qmin_double);
														
 
															-
														
 
															-  // Zero-point computation.
														
 
															-  // First the initial floating-point computation. The zero-point can be
														
 
															-  // determined from solving an affine equation for any known pair
														
 
															-  // (real value, corresponding quantized value).
														
 
															-  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
														
 
															-  // The arithmetic error on the zero point computed from either pair
														
 
															-  // will be roughly machine_epsilon * (sum of absolute values of terms)
														
 
															-  // so we want to use the variant that adds the smaller terms.
														
 
															-  const double zero_point_from_min = qmin_double - rmin / scale;
														
 
															-  const double zero_point_from_max = qmax_double - rmax / scale;
														
 
															-  const double zero_point_from_min_error =
														
 
															-      std::abs(qmin_double) + std::abs(rmin / scale);
														
 
															-  const double zero_point_from_max_error =
														
 
															-      std::abs(qmax_double) + std::abs(rmax / scale);
														
 
															-
														
 
															-  const double zero_point_double =
														
 
															-      zero_point_from_min_error < zero_point_from_max_error
														
 
															-          ? zero_point_from_min
														
 
															-          : zero_point_from_max;
														
 
															-
														
 
															-  // Now we need to nudge the zero point to be an integer
														
 
															-  // (our zero points are integer, and this is motivated by the requirement
														
 
															-  // to be able to represent the real value "0" exactly as a quantized value,
														
 
															-  // which is required in multiple places, for example in Im2col with SAME
														
 
															-  // padding).
														
 
															-  T nudged_zero_point = 0;
														
 
															-  if (zero_point_double < qmin_double) {
														
 
															-    nudged_zero_point = qmin;
														
 
															-  } else if (zero_point_double > qmax_double) {
														
 
															-    nudged_zero_point = qmax;
														
 
															-  } else {
														
 
															-    nudged_zero_point = static_cast<T>(round(zero_point_double));
														
 
															-  }
														
 
															-  // The zero point should always be in the range of quantized value,
														
 
															-  // [qmin, qmax].
														
 
															-  TFLITE_CHECK_GE(nudged_zero_point, qmin);
														
 
															-  TFLITE_CHECK_LE(nudged_zero_point, qmax);
														
 
															-
														
 
															-  // Finally, store the result nudged quantization params.
														
 
															-  QuantizationParams quantization_params;
														
 
															-  quantization_params.zero_point = nudged_zero_point;
														
 
															-  quantization_params.scale = scale;
														
 
															-  return quantization_params;
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
														
 
															-  return ChooseQuantizationParams<T>(rmin, rmax, false);
														
 
															-}
														
 
															-
														
 
															-// Converts a floating-point number to an integer. For all inputs x where
														
 
															-// static_cast<IntOut>(x) is legal according to the C++ standard, the result
														
 
															-// is identical to that cast (i.e. the result is x with its fractional part
														
 
															-// truncated whenever that is representable as IntOut).
														
 
															-//
														
 
															-// static_cast would cause undefined behavior for the following cases, which
														
 
															-// have well-defined behavior for this function:
														
 
															-//
														
 
															-//  1. If x is NaN, the result is zero.
														
 
															-//
														
 
															-//  2. If the truncated form of x is above the representable range of IntOut,
														
 
															-//     the result is std::numeric_limits<IntOut>::max().
														
 
															-//
														
 
															-//  3. If the truncated form of x is below the representable range of IntOut,
														
 
															-//     the result is std::numeric_limits<IntOut>::min().
														
 
															-//
														
 
															-// Note that cases #2 and #3 cover infinities as well as finite numbers.
														
 
															-//
														
 
															-// The range of FloatIn must include the range of IntOut, otherwise
														
 
															-// the results are undefined.
														
 
															-// TODO(sfeuz): Replace by absl::SafeCast once available.
														
 
															-template <class IntOut, class FloatIn>
														
 
															-IntOut SafeCast(FloatIn x) {
														
 
															-  static_assert(!std::numeric_limits<FloatIn>::is_integer,
														
 
															-                "FloatIn is integer");
														
 
															-  static_assert(std::numeric_limits<IntOut>::is_integer,
														
 
															-                "IntOut is not integer");
														
 
															-  static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
														
 
															-
														
 
															-  // Special case NaN, for which the logic below doesn't work.
														
 
															-  if (std::isnan(x)) {
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  // Negative values all clip to zero for unsigned results.
														
 
															-  if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
														
 
															-    return 0;
														
 
															-  }
														
 
															-
														
 
															-  // Handle infinities.
														
 
															-  if (std::isinf(x)) {
														
 
															-    return x < 0 ? std::numeric_limits<IntOut>::min()
														
 
															-                 : std::numeric_limits<IntOut>::max();
														
 
															-  }
														
 
															-
														
 
															-  // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
														
 
															-  // unless x is zero in which case exp == 0. Note that this implies that the
														
 
															-  // magnitude of x is strictly less than 2^exp.
														
 
															-  int exp = 0;
														
 
															-  std::frexp(x, &exp);
														
 
															-
														
 
															-  // Let N be the number of non-sign bits in the representation of IntOut. If
														
 
															-  // the magnitude of x is strictly less than 2^N, the truncated version of x
														
 
															-  // is representable as IntOut. The only representable integer for which this
														
 
															-  // is not the case is kMin for signed types (i.e. -2^N), but that is covered
														
 
															-  // by the fall-through below.
														
 
															-  if (exp <= std::numeric_limits<IntOut>::digits) {
														
 
															-    return x;
														
 
															-  }
														
 
															-
														
 
															-  // Handle numbers with magnitude >= 2^N.
														
 
															-  return x < 0 ? std::numeric_limits<IntOut>::min()
														
 
															-               : std::numeric_limits<IntOut>::max();
														
 
															-}
														
 
															-
														
 
															-// Decompose a double multiplier into a Q0.31 int32 representation of its
														
 
															-// significand, and shift representation of NEGATIVE its exponent ---
														
 
															-// this is intended as a RIGHT-shift.
														
 
															-//
														
 
															-// Restricted to the case where the multiplier < 1 (and non-negative).
														
 
															-void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
														
 
															-                                         int32_t* quantized_multiplier,
														
 
															-                                         int* left_shift);
														
 
															-
														
 
															-// Decompose a double multiplier into a Q0.31 int32 representation of its
														
 
															-// significand, and shift representation of its exponent.
														
 
															-//
														
 
															-// Restricted to the case where the multiplier > 1.
														
 
															-void QuantizeMultiplierGreaterThanOne(double double_multiplier,
														
 
															-                                      int32_t* quantized_multiplier,
														
 
															-                                      int* left_shift);
														
 
															-
														
 
															-// Decompose a double multiplier into a Q0.31 int32 representation of its
														
 
															-// significand, and shift representation of its exponent.
														
 
															-//
														
 
															-// Handles an arbitrary positive multiplier. The 'shift' output-value is
														
 
															-// basically the 'floating-point exponent' of the multiplier:
														
 
															-// Negative for a right-shift (when the multiplier is <1), positive for a
														
 
															-// left-shift (when the multiplier is >1)
														
 
															-void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
														
 
															-                        int* shift);
														
 
															-
														
 
															-// Splits a double input value into a returned fraction, and a shift value from
														
 
															-// the exponent, using only bitwise and integer operations to support
														
 
															-// microcontrollers and other environments without floating-point support.
														
 
															-//
														
 
															-// This is designed to be a replacement for how std::frexp() is used within the
														
 
															-// QuantizeMultiplier() function, and so has a different signature than the
														
 
															-// standard version, returning a 64-bit integer rather than a double. This
														
 
															-// result has a maximum value of 1<<31, with the fraction expressed as a
														
 
															-// proportion of that maximum.
														
 
															-//
														
 
															-// std::frexp() returns NaNs and infinities unmodified, but since we're
														
 
															-// returning integers that can't represent those values, instead we return
														
 
															-// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
														
 
															-// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
														
 
															-// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
														
 
															-// result in return values that end up truncating some bits at the end,
														
 
															-// reflecting the loss of precision inherent in denormalization.
														
 
															-int64_t IntegerFrExp(double input, int* shift);
														
 
															-
														
 
															-// Converts an integer fraction in the format produced by IntegerFrExp (where
														
 
															-// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
														
 
															-// IEEE binary64 double format result. The implementation uses only integer and
														
 
															-// bitwise operators, so no floating point hardware support or emulation is
														
 
															-// needed. This is here so quantized operations can run non-time-critical
														
 
															-// preparation calculations on microcontrollers and other platforms without
														
 
															-// float support.
														
 
															-double DoubleFromFractionAndShift(int64_t fraction, int shift);
														
 
															-
														
 
															-// Performs a multiplication of two numbers in double format, using only integer
														
 
															-// and bitwise instructions. This is aimed at supporting housekeeping functions
														
 
															-// for quantized operations on microcontrollers without floating-point hardware.
														
 
															-double IntegerDoubleMultiply(double a, double b);
														
 
															-
														
 
															-// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
														
 
															-// greater than b. It is implemented using only integer and logical instructions
														
 
															-// so that it can be easily run on microcontrollers for quantized operations.
														
 
															-int IntegerDoubleCompare(double a, double b);
														
 
															-
														
 
															-// This first creates a multiplier in a double equivalent of
														
 
															-// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
														
 
															-// precision in the double's fractional bits.  It then splits the result into
														
 
															-// significand and exponent.
														
 
															-void PreprocessSoftmaxScaling(double beta, double input_scale,
														
 
															-                              int input_integer_bits,
														
 
															-                              int32_t* quantized_multiplier, int* left_shift);
														
 
															-// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
														
 
															-void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
														
 
															-                                    int input_integer_bits,
														
 
															-                                    int32_t* quantized_multiplier,
														
 
															-                                    int* left_shift,
														
 
															-                                    int32_t* reverse_scaling_divisor,
														
 
															-                                    int* reverse_scaling_left_shift);
														
 
															-// Calculate the largest input that will result in a within-bounds intermediate
														
 
															-// result within MultiplyByQuantizedMultiplierGreaterThanOne.  In other words,
														
 
															-// it must not overflow before we reduce the value by multiplication by the
														
 
															-// input multiplier.  The negative radius is used as the minimum difference in
														
 
															-// Softmax.
														
 
															-int CalculateInputRadius(int input_integer_bits, int input_left_shift,
														
 
															-                         int total_signed_bits = 31);
														
 
															-
														
 
															-// Nudges a min/max quantization range to ensure zero is zero.
														
 
															-// Gymnastics with nudged zero point is to ensure that real zero maps to
														
 
															-// an integer, which is required for e.g. zero-padding in convolutional layers.
														
 
															-// Outputs nudged_min, nudged_max, nudged_scale.
														
 
															-void NudgeQuantizationRange(const float min, const float max,
														
 
															-                            const int quant_min, const int quant_max,
														
 
															-                            float* nudged_min, float* nudged_max,
														
 
															-                            float* nudged_scale);
														
 
															-
														
 
															-// Fake quantizes (quantizes and dequantizes) input_data using the scale,
														
 
															-// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
														
 
															-// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
														
 
															-void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
														
 
															-                       const float nudged_max, const float* input_data,
														
 
															-                       float* output_data, const float size);
														
 
															-
														
 
															-// If x is approximately a power of two (with any positive or negative
														
 
															-// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
														
 
															-// returns false.
														
 
															-bool CheckedLog2(const float x, int* log2_result);
														
 
															-
														
 
															-// Decomposes an array of double multipliers into a Q0.31 int32 representation
														
 
															-// of its significand, and shift representation of its exponent.
														
 
															-//
														
 
															-// Handles an arbitrary multiplier. The 'shift' output-value is
														
 
															-// basically the 'floating-point exponent' of the multiplier:
														
 
															-// Negative for a right-shift (when the multiplier is <1), positive for a
														
 
															-// left-shift (when the multiplier is >1)
														
 
															-void QuantizeMultiplierArray(const double* effective_scales, size_t size,
														
 
															-                             int32_t* effective_scale_significand,
														
 
															-                             int* effective_shift);
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h
@@ -1,400 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <type_traits>
														
 
															-
														
 
															-#include "fixedpoint/fixedpoint.h"
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void Add(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const T* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const T* input2_data,
														
 
															-                const RuntimeShape& output_shape, T* output_data) {
														
 
															-  T activation_min, activation_max;
														
 
															-  GetActivationParams(params, &activation_min, &activation_max);
														
 
															-
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    output_data[i] = ActivationFunctionWithMinMax(
														
 
															-        input1_data[i] + input2_data[i], activation_min, activation_max);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Element-wise add that can often be used for inner loop of broadcast add as
														
 
															-// well as the non-broadcast add.
														
 
															-
														
 
															-// This function is used for 8-bit as well as for 16-bit, but the accumulator
														
 
															-// is 32-bit for both cases. The overflow does not happen due to the
														
 
															-// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
														
 
															-template <typename T>
														
 
															-inline void AddElementwise(int size, const ArithmeticParams& params,
														
 
															-                           const T* input1_data, const T* input2_data,
														
 
															-                           T* output_data) {
														
 
															-  TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
														
 
															-  TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
														
 
															-  TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
														
 
															-  TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
														
 
															-
														
 
															-  for (int i = 0; i < size; ++i) {
														
 
															-    const int32_t input1_val = params.input1_offset + input1_data[i];
														
 
															-    const int32_t input2_val = params.input2_offset + input2_data[i];
														
 
															-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
														
 
															-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
														
 
															-    const int32_t scaled_input1_val =
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
														
 
															-    const int32_t scaled_input2_val =
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
														
 
															-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
														
 
															-    const int32_t raw_output =
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            raw_sum, params.output_multiplier, params.output_shift) +
														
 
															-        params.output_offset;
														
 
															-    const int32_t clamped_output =
														
 
															-        std::min(params.quantized_activation_max,
														
 
															-                 std::max(params.quantized_activation_min, raw_output));
														
 
															-    output_data[i] = static_cast<T>(clamped_output);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Scalar-broadcast add that can be used for inner loop of more general
														
 
															-// broadcast add, so that, for example, scalar-broadcast with batch will still
														
 
															-// be fast.
														
 
															-inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
														
 
															-                               uint8_t input1_data, const uint8_t* input2_data,
														
 
															-                               uint8_t* output_data) {
														
 
															-  TFLITE_DCHECK_GT(params.input1_offset, -256);
														
 
															-  TFLITE_DCHECK_GT(params.input2_offset, -256);
														
 
															-  TFLITE_DCHECK_LT(params.input1_offset, 256);
														
 
															-  TFLITE_DCHECK_LT(params.input2_offset, 256);
														
 
															-
														
 
															-  const int32_t input1_val = params.input1_offset + input1_data;
														
 
															-  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
														
 
															-  const int32_t scaled_input1_val =
														
 
															-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-          shifted_input1_val, params.input1_multiplier, params.input1_shift);
														
 
															-  for (int i = 0; i < size; ++i) {
														
 
															-    const int32_t input2_val = params.input2_offset + input2_data[i];
														
 
															-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
														
 
															-    const int32_t scaled_input2_val =
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
														
 
															-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
														
 
															-    const int32_t raw_output =
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            raw_sum, params.output_multiplier, params.output_shift) +
														
 
															-        params.output_offset;
														
 
															-    const int32_t clamped_output =
														
 
															-        std::min(params.quantized_activation_max,
														
 
															-                 std::max(params.quantized_activation_min, raw_output));
														
 
															-    output_data[i] = static_cast<uint8_t>(clamped_output);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void Add(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
														
 
															-                const RuntimeShape& output_shape, uint8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-
														
 
															-  TFLITE_DCHECK_GT(params.input1_offset, -256);
														
 
															-  TFLITE_DCHECK_GT(params.input2_offset, -256);
														
 
															-  TFLITE_DCHECK_LT(params.input1_offset, 256);
														
 
															-  TFLITE_DCHECK_LT(params.input2_offset, 256);
														
 
															-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
														
 
															-}
														
 
															-
														
 
															-inline void AddGeneralParamScale(const ArithmeticParams& params,
														
 
															-                                 const RuntimeShape& input1_shape,
														
 
															-                                 const int16_t* input1_data,
														
 
															-                                 const RuntimeShape& input2_shape,
														
 
															-                                 const int16_t* input2_data,
														
 
															-                                 const RuntimeShape& output_shape,
														
 
															-                                 int16_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-
														
 
															-  int max_value = std::numeric_limits<int16_t>::max();
														
 
															-
														
 
															-  TFLITE_DCHECK_GT(params.input1_offset, -max_value);
														
 
															-  TFLITE_DCHECK_GT(params.input2_offset, -max_value);
														
 
															-  TFLITE_DCHECK_LT(params.input1_offset, max_value);
														
 
															-  TFLITE_DCHECK_LT(params.input2_offset, max_value);
														
 
															-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
														
 
															-}
														
 
															-
														
 
															-inline void Add(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const int16_t* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const int16_t* input2_data,
														
 
															-                const RuntimeShape& output_shape, int16_t* output_data,
														
 
															-                bool pot_scale = true) {
														
 
															-  if (!pot_scale) {
														
 
															-    AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
														
 
															-                         input2_data, output_shape, output_data);
														
 
															-    return;
														
 
															-  }
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-
														
 
															-  const int input1_shift = params.input1_shift;
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-  const int16_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int16_t output_activation_max = params.quantized_activation_max;
														
 
															-
														
 
															-  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
														
 
															-  TFLITE_DCHECK_LE(input1_shift, 0);
														
 
															-  TFLITE_DCHECK_LE(params.input2_shift, 0);
														
 
															-  const int16_t* not_shift_input =
														
 
															-      input1_shift == 0 ? input1_data : input2_data;
														
 
															-  const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
														
 
															-  const int input_right_shift =
														
 
															-      input1_shift == 0 ? -params.input2_shift : -input1_shift;
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; i++) {
														
 
															-    // F0 uses 0 integer bits, range [-1, 1].
														
 
															-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
														
 
															-
														
 
															-    F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
														
 
															-    F0 scaled_input = F0::FromRaw(
														
 
															-        gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
														
 
															-    F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
														
 
															-    const int16_t raw_output = result.raw();
														
 
															-    const int16_t clamped_output = std::min(
														
 
															-        output_activation_max, std::max(output_activation_min, raw_output));
														
 
															-    output_data[i] = clamped_output;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline typename std::enable_if<!is_small_integer<T>::value, void>::type
														
 
															-BroadcastAdd4DSlow(const ArithmeticParams& params,
														
 
															-                   const RuntimeShape& input1_shape, const T* input1_data,
														
 
															-                   const RuntimeShape& input2_shape, const T* input2_data,
														
 
															-                   const RuntimeShape& output_shape, T* output_data) {
														
 
															-  NdArrayDesc<4> desc1;
														
 
															-  NdArrayDesc<4> desc2;
														
 
															-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
														
 
															-                                      &desc2);
														
 
															-  const RuntimeShape extended_output_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, output_shape);
														
 
															-
														
 
															-  T activation_min, activation_max;
														
 
															-  GetActivationParams(params, &activation_min, &activation_max);
														
 
															-
														
 
															-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
														
 
															-  // col, channel), with extents (batches, height, width, depth), with the
														
 
															-  // trailing dimension changing most rapidly (channels has the smallest stride,
														
 
															-  // typically 1 element).
														
 
															-  //
														
 
															-  // In generated C code, we store arrays with the dimensions reversed. The
														
 
															-  // first dimension has smallest stride.
														
 
															-  //
														
 
															-  // We name our variables by their Tensorflow convention, but generate C code
														
 
															-  // nesting loops such that the innermost loop has the smallest stride for the
														
 
															-  // best cache behavior.
														
 
															-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
														
 
															-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
														
 
															-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
														
 
															-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
														
 
															-          output_data[Offset(extended_output_shape, b, y, x, c)] =
														
 
															-              ActivationFunctionWithMinMax<T>(
														
 
															-                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
														
 
															-                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
														
 
															-                  activation_min, activation_max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// This function is used for 8-bit as well as for 16-bit, but the accumulator
														
 
															-// is 32-bit for both cases. The overflow does not happen due to the
														
 
															-// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
														
 
															-template <typename T>
														
 
															-inline typename std::enable_if<is_small_integer<T>::value, void>::type
														
 
															-BroadcastAdd4DSlow(const ArithmeticParams& params,
														
 
															-                   const RuntimeShape& input1_shape, const T* input1_data,
														
 
															-                   const RuntimeShape& input2_shape, const T* input2_data,
														
 
															-                   const RuntimeShape& output_shape, T* output_data) {
														
 
															-  NdArrayDesc<4> desc1;
														
 
															-  NdArrayDesc<4> desc2;
														
 
															-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
														
 
															-                                      &desc2);
														
 
															-  const RuntimeShape extended_output_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, output_shape);
														
 
															-
														
 
															-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
														
 
															-  // col, channel), with extents (batches, height, width, depth), with the
														
 
															-  // trailing dimension changing most rapidly (channels has the smallest stride,
														
 
															-  // typically 1 element).
														
 
															-  //
														
 
															-  // In generated C code, we store arrays with the dimensions reversed. The
														
 
															-  // first dimension has smallest stride.
														
 
															-  //
														
 
															-  // We name our variables by their Tensorflow convention, but generate C code
														
 
															-  // nesting loops such that the innermost loop has the smallest stride for the
														
 
															-  // best cache behavior.
														
 
															-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
														
 
															-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
														
 
															-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
														
 
															-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
														
 
															-          const int32_t input1_val =
														
 
															-              params.input1_offset +
														
 
															-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
														
 
															-          const int32_t input2_val =
														
 
															-              params.input2_offset +
														
 
															-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
														
 
															-          const int32_t shifted_input1_val =
														
 
															-              input1_val * (1 << params.left_shift);
														
 
															-          const int32_t shifted_input2_val =
														
 
															-              input2_val * (1 << params.left_shift);
														
 
															-          const int32_t scaled_input1_val =
														
 
															-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-                  shifted_input1_val, params.input1_multiplier,
														
 
															-                  params.input1_shift);
														
 
															-          const int32_t scaled_input2_val =
														
 
															-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-                  shifted_input2_val, params.input2_multiplier,
														
 
															-                  params.input2_shift);
														
 
															-          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
														
 
															-          const int32_t raw_output =
														
 
															-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-                  raw_sum, params.output_multiplier, params.output_shift) +
														
 
															-              params.output_offset;
														
 
															-          const int32_t clamped_output =
														
 
															-              std::min(params.quantized_activation_max,
														
 
															-                       std::max(params.quantized_activation_min, raw_output));
														
 
															-          output_data[Offset(extended_output_shape, b, y, x, c)] =
														
 
															-              static_cast<T>(clamped_output);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
														
 
															-                                 const RuntimeShape& unswitched_input1_shape,
														
 
															-                                 const uint8_t* unswitched_input1_data,
														
 
															-                                 const RuntimeShape& unswitched_input2_shape,
														
 
															-                                 const uint8_t* unswitched_input2_data,
														
 
															-                                 const RuntimeShape& output_shape,
														
 
															-                                 uint8_t* output_data) {
														
 
															-  ArithmeticParams switched_params = unswitched_params;
														
 
															-  switched_params.input1_offset = unswitched_params.input2_offset;
														
 
															-  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
														
 
															-  switched_params.input1_shift = unswitched_params.input2_shift;
														
 
															-  switched_params.input2_offset = unswitched_params.input1_offset;
														
 
															-  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
														
 
															-  switched_params.input2_shift = unswitched_params.input1_shift;
														
 
															-
														
 
															-  const bool use_unswitched =
														
 
															-      unswitched_params.broadcast_category ==
														
 
															-      tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
														
 
															-
														
 
															-  const ArithmeticParams& params =
														
 
															-      use_unswitched ? unswitched_params : switched_params;
														
 
															-  const uint8_t* input1_data =
														
 
															-      use_unswitched ? unswitched_input1_data : unswitched_input2_data;
														
 
															-  const uint8_t* input2_data =
														
 
															-      use_unswitched ? unswitched_input2_data : unswitched_input1_data;
														
 
															-
														
 
															-  // Fivefold nested loops. The second input resets its position for each
														
 
															-  // iteration of the second loop. The first input resets its position at the
														
 
															-  // beginning of the fourth loop. The innermost loop is an elementwise add of
														
 
															-  // sections of the arrays.
														
 
															-  uint8_t* output_data_ptr = output_data;
														
 
															-  const uint8_t* input1_data_ptr = input1_data;
														
 
															-  const uint8_t* input2_data_reset = input2_data;
														
 
															-  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
														
 
															-  // between input shapes. y3 for input 1 is always broadcast, and so the
														
 
															-  // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
														
 
															-  // Put another way,
														
 
															-  // input1.shape.FlatSize = y0 * y1 * y2 * y4,
														
 
															-  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
														
 
															-  int y0 = params.broadcast_shape[0];
														
 
															-  int y1 = params.broadcast_shape[1];
														
 
															-  int y2 = params.broadcast_shape[2];
														
 
															-  int y3 = params.broadcast_shape[3];
														
 
															-  int y4 = params.broadcast_shape[4];
														
 
															-  if (y4 > 1) {
														
 
															-    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
														
 
															-    // dimension.
														
 
															-    for (int i0 = 0; i0 < y0; ++i0) {
														
 
															-      const uint8_t* input2_data_ptr;
														
 
															-      for (int i1 = 0; i1 < y1; ++i1) {
														
 
															-        input2_data_ptr = input2_data_reset;
														
 
															-        for (int i2 = 0; i2 < y2; ++i2) {
														
 
															-          for (int i3 = 0; i3 < y3; ++i3) {
														
 
															-            AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
														
 
															-                           output_data_ptr);
														
 
															-            input2_data_ptr += y4;
														
 
															-            output_data_ptr += y4;
														
 
															-          }
														
 
															-          // We have broadcast y4 of input1 data y3 times, and now move on.
														
 
															-          input1_data_ptr += y4;
														
 
															-        }
														
 
															-      }
														
 
															-      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
														
 
															-      input2_data_reset = input2_data_ptr;
														
 
															-    }
														
 
															-  } else {
														
 
															-    // Special case of y4 == 1, in which the innermost loop is a single element
														
 
															-    // and can be combined with the next (y3) as an inner broadcast.
														
 
															-    //
														
 
															-    // Note that this handles the case of pure scalar broadcast when
														
 
															-    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
														
 
															-    // broadcast with batch (as y2 > 1).
														
 
															-    //
														
 
															-    // NOTE The process is the same as the above general case except simplified
														
 
															-    // for y4 == 1 and the loop over y3 is contained within the
														
 
															-    // AddScalarBroadcast function.
														
 
															-    for (int i0 = 0; i0 < y0; ++i0) {
														
 
															-      const uint8_t* input2_data_ptr;
														
 
															-      for (int i1 = 0; i1 < y1; ++i1) {
														
 
															-        input2_data_ptr = input2_data_reset;
														
 
															-        for (int i2 = 0; i2 < y2; ++i2) {
														
 
															-          AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
														
 
															-                             output_data_ptr);
														
 
															-          input2_data_ptr += y3;
														
 
															-          output_data_ptr += y3;
														
 
															-          input1_data_ptr += 1;
														
 
															-        }
														
 
															-      }
														
 
															-      input2_data_reset = input2_data_ptr;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h
@@ -1,86 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-// T is expected to be either float or int.
														
 
															-template <typename T>
														
 
															-inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
														
 
															-                 const T* const* input_data, T* output_data) {
														
 
															-  // All inputs and output should have the same shape, this is checked during
														
 
															-  // Prepare stage.
														
 
															-  const size_t size = input_shape.FlatSize();
														
 
															-  for (size_t i = 0; i < size; ++i) {
														
 
															-    T x = 0;
														
 
															-    for (size_t j = 0; j < num_inputs; ++j) {
														
 
															-      x += input_data[j][i];
														
 
															-    }
														
 
															-    output_data[i] = x;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void AddN(const ArithmeticParams& params,
														
 
															-                 const RuntimeShape& input_shape, const size_t num_inputs,
														
 
															-                 const int8_t* const* input_data, int8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  // Input offset is negative input zero point. Activation tensors are
														
 
															-  // asymmetric quantized so they span the full int8 range.
														
 
															-  // All inputs should have same zero-point and scale, this is checked during
														
 
															-  // Prepare stage.
														
 
															-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
														
 
															-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
														
 
															-
														
 
															-  // All inputs and output should have the same shape, this is checked during
														
 
															-  // Prepare stage.
														
 
															-  const size_t size = input_shape.FlatSize();
														
 
															-  for (size_t i = 0; i < size; ++i) {
														
 
															-    // accumulate in scaled_x before clamping to avoid overflow
														
 
															-    const int32_t x = params.input1_offset;  // x = 0
														
 
															-    const int32_t shifted_x = x * (1 << params.left_shift);
														
 
															-    int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-        shifted_x, params.input1_multiplier, params.input1_shift);
														
 
															-
														
 
															-    for (size_t j = 0; j < num_inputs; ++j) {
														
 
															-      const int32_t y = params.input1_offset + input_data[j][i];
														
 
															-      const int32_t shifted_y = y * (1 << params.left_shift);
														
 
															-      int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-          shifted_y, params.input1_multiplier, params.input1_shift);
														
 
															-      scaled_x += scaled_y;
														
 
															-    }
														
 
															-
														
 
															-    const int32_t raw_output =
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            scaled_x, params.output_multiplier, params.output_shift) +
														
 
															-        params.output_offset;
														
 
															-    const int32_t clamped_output =
														
 
															-        std::min(params.quantized_activation_max,
														
 
															-                 std::max(params.quantized_activation_min, raw_output));
														
 
															-    output_data[i] = static_cast<int8_t>(clamped_output);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h
@@ -1,88 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
														
 
															-
														
 
															-#include <functional>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-std::function<bool(T, T)> GetComparefunction(bool is_arg_max) {
														
 
															-  if (is_arg_max) {
														
 
															-    return std::greater<T>();
														
 
															-  } else {
														
 
															-    return std::less<T>();
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T1, typename T2, typename T3, typename Cmp>
														
 
															-void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
														
 
															-               const T3* input2_data, const RuntimeShape& output_shape,
														
 
															-               T2* output_data, const Cmp& cmp) {
														
 
															-  TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
														
 
															-  TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
														
 
															-                   output_shape.DimensionsCount());
														
 
															-  int axis = input2_data[0];
														
 
															-  if (axis < 0) {
														
 
															-    axis += input1_shape.DimensionsCount();
														
 
															-  }
														
 
															-  const int axis_size = input1_shape.Dims(axis);
														
 
															-
														
 
															-  int outer_size = 1;
														
 
															-  for (int i = 0; i < axis; ++i) {
														
 
															-    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
														
 
															-    outer_size *= input1_shape.Dims(i);
														
 
															-  }
														
 
															-
														
 
															-  int inner_size = 1;
														
 
															-  const int dims_count = input1_shape.DimensionsCount();
														
 
															-  for (int i = axis + 1; i < dims_count; ++i) {
														
 
															-    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
														
 
															-    inner_size *= input1_shape.Dims(i);
														
 
															-  }
														
 
															-  for (int outer = 0; outer < outer_size; ++outer) {
														
 
															-    for (int inner = 0; inner < inner_size; ++inner) {
														
 
															-      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
														
 
															-      T2 min_max_index = 0;
														
 
															-      for (int i = 1; i < axis_size; ++i) {
														
 
															-        const auto& curr_value =
														
 
															-            input1_data[(outer * axis_size + i) * inner_size + inner];
														
 
															-        if (cmp(curr_value, min_max_value)) {
														
 
															-          min_max_value = curr_value;
														
 
															-          min_max_index = static_cast<T2>(i);
														
 
															-        }
														
 
															-      }
														
 
															-      output_data[outer * inner_size + inner] = min_max_index;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T1, typename T2, typename T3>
														
 
															-void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
														
 
															-               const T3* input2_data, const RuntimeShape& output_shape,
														
 
															-               T2* output_data, const bool is_arg_max) {
														
 
															-  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
														
 
															-            GetComparefunction<T1>(is_arg_max));
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h
@@ -1,275 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <cstdint>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-namespace batch_matmul {
														
 
															-
														
 
															-// Determine which dimension is the broadcast dimension.
														
 
															-inline int broadcast_dim(int lhs_dim, int rhs_dim) {
														
 
															-  if (lhs_dim == rhs_dim) return lhs_dim;
														
 
															-  if (lhs_dim == 1) return rhs_dim;
														
 
															-  TFLITE_DCHECK_EQ(rhs_dim, 1);
														
 
															-  return lhs_dim;
														
 
															-}
														
 
															-
														
 
															-// Compute the "extent" for iterating on this dimension.
														
 
															-// If we are broadcasting, then don't advance (i.e return 0).
														
 
															-inline int extent(const RuntimeShape& shape, int x) {
														
 
															-  if (shape.Dims(x) == 1) {
														
 
															-    return 0;
														
 
															-  }
														
 
															-  int prod = 1;
														
 
															-  for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
														
 
															-    prod *= shape.Dims(i);
														
 
															-  }
														
 
															-  return prod;
														
 
															-}
														
 
															-
														
 
															-}  // namespace batch_matmul
														
 
															-
														
 
															-template <typename Ta, typename Tb, typename Tout>
														
 
															-inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
														
 
															-                        const RuntimeShape& rhs_shape, const Tb* rhs_data,
														
 
															-                        const RuntimeShape& output_shape, Tout* output_data) {
														
 
															-  const RuntimeShape extended_lhs_shape =
														
 
															-      RuntimeShape::ExtendedShape(5, lhs_shape);
														
 
															-  const RuntimeShape extended_rhs_shape =
														
 
															-      RuntimeShape::ExtendedShape(5, rhs_shape);
														
 
															-
														
 
															-  const int batch_dim0 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
														
 
															-  const int batch_dim1 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
														
 
															-  const int batch_dim2 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
														
 
															-
														
 
															-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
														
 
															-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
														
 
															-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
														
 
															-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
														
 
															-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
														
 
															-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
														
 
															-
														
 
															-  // Set params for each matrix multiply.
														
 
															-  const int lhs_rows = extended_lhs_shape.Dims(3);
														
 
															-  const int rhs_cols = extended_rhs_shape.Dims(4);
														
 
															-  const int accum_depth = extended_lhs_shape.Dims(4);
														
 
															-
														
 
															-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
														
 
															-    const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
														
 
															-    const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
														
 
															-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
														
 
															-      const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
														
 
															-      const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
														
 
															-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
														
 
															-        const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
														
 
															-        const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
														
 
															-        Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
														
 
															-                                       b1 * batch_dim2 + b2) *
														
 
															-                                          lhs_rows * rhs_cols;
														
 
															-        for (int j = 0; j < rhs_cols; ++j) {
														
 
															-          for (int i = 0; i < lhs_rows; ++i) {
														
 
															-            Tout total = 0;
														
 
															-            for (int k = 0; k < accum_depth; ++k) {
														
 
															-              total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
														
 
															-                       static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
														
 
															-            }
														
 
															-            int idx = lhs_rows * j + i;
														
 
															-            out_ptr[idx] = total;
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
														
 
															-                        const RuntimeShape& rhs_shape, const int8_t* rhs_data,
														
 
															-                        const float* scaling_factors,
														
 
															-                        const int32_t* input_offset, int32_t* row_sums,
														
 
															-                        const RuntimeShape& output_shape, float* output_data,
														
 
															-                        bool* compute_row_sums) {
														
 
															-  const RuntimeShape extended_lhs_shape =
														
 
															-      RuntimeShape::ExtendedShape(5, lhs_shape);
														
 
															-  const RuntimeShape extended_rhs_shape =
														
 
															-      RuntimeShape::ExtendedShape(5, rhs_shape);
														
 
															-
														
 
															-  const int batch_dim0 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
														
 
															-  const int batch_dim1 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
														
 
															-  const int batch_dim2 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
														
 
															-
														
 
															-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
														
 
															-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
														
 
															-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
														
 
															-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
														
 
															-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
														
 
															-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
														
 
															-
														
 
															-  // Set params for each matrix multiply.
														
 
															-  const int lhs_rows = extended_lhs_shape.Dims(3);
														
 
															-  const int rhs_cols = extended_rhs_shape.Dims(4);
														
 
															-  const int accum_depth = extended_lhs_shape.Dims(4);
														
 
															-
														
 
															-  const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
														
 
															-  const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
														
 
															-  const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
														
 
															-  const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
														
 
															-  const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
														
 
															-  const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
														
 
															-
														
 
															-  if (!compute_row_sums || *compute_row_sums) {
														
 
															-    int num_weights_matrices = 1;
														
 
															-    for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
														
 
															-      num_weights_matrices *= extended_lhs_shape.Dims(i);
														
 
															-    }
														
 
															-    tensor_utils::ReductionSumVector(
														
 
															-        lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
														
 
															-    if (compute_row_sums) {
														
 
															-      *compute_row_sums = false;
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
														
 
															-    const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
														
 
															-    const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
														
 
															-    const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
														
 
															-    const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
														
 
															-    const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
														
 
															-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
														
 
															-      const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
														
 
															-      const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
														
 
															-      const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
														
 
															-      const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
														
 
															-      const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
														
 
															-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
														
 
															-        const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
														
 
															-        const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
														
 
															-        const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
														
 
															-        const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
														
 
															-        const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
														
 
															-        float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
														
 
															-                                        b1 * batch_dim2 + b2) *
														
 
															-                                           lhs_rows * rhs_cols;
														
 
															-        for (int j = 0; j < rhs_cols; ++j) {
														
 
															-          const float batch_scaling_factor = scale_ptr2[j];
														
 
															-          const float batch_offset = static_cast<float>(ioff_ptr2[j]);
														
 
															-          for (int i = 0; i < lhs_rows; ++i) {
														
 
															-            int32_t total = 0;
														
 
															-            for (int k = 0; k < accum_depth; ++k) {
														
 
															-              total +=
														
 
															-                  lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
														
 
															-            }
														
 
															-            int32_t row_sum = woff_ptr2[i];
														
 
															-            total -= row_sum * batch_offset;
														
 
															-            int idx = lhs_rows * j + i;
														
 
															-            out_ptr[idx] += batch_scaling_factor * total;
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T, typename AccumT>
														
 
															-inline void BatchMatMul(const FullyConnectedParams& params,
														
 
															-                        const RuntimeShape& lhs_shape, const T* lhs_data,
														
 
															-                        const RuntimeShape& rhs_shape, const T* rhs_data,
														
 
															-                        const RuntimeShape& output_shape, T* output_data) {
														
 
															-  const RuntimeShape extended_lhs_shape =
														
 
															-      RuntimeShape::ExtendedShape(5, lhs_shape);
														
 
															-  const RuntimeShape extended_rhs_shape =
														
 
															-      RuntimeShape::ExtendedShape(5, rhs_shape);
														
 
															-
														
 
															-  const int batch_dim0 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
														
 
															-  const int batch_dim1 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
														
 
															-  const int batch_dim2 = batch_matmul::broadcast_dim(
														
 
															-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
														
 
															-
														
 
															-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
														
 
															-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
														
 
															-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
														
 
															-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
														
 
															-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
														
 
															-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
														
 
															-
														
 
															-  // Set params for each matrix multiply.
														
 
															-  const int lhs_rows = extended_lhs_shape.Dims(3);
														
 
															-  const int rhs_cols = extended_rhs_shape.Dims(4);
														
 
															-  const int accum_depth = extended_lhs_shape.Dims(4);
														
 
															-
														
 
															-  const int32_t input_offset = params.input_offset;
														
 
															-  const int32_t filter_offset = params.weights_offset;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-  const int32_t output_multiplier = params.output_multiplier;
														
 
															-  const int output_shift = params.output_shift;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-
														
 
															-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
														
 
															-    const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
														
 
															-    const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
														
 
															-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
														
 
															-      const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
														
 
															-      const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
														
 
															-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
														
 
															-        const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
														
 
															-        const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
														
 
															-        T* out_ptr = output_data +
														
 
															-                     ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
														
 
															-                         lhs_rows * rhs_cols;
														
 
															-
														
 
															-        for (int j = 0; j < rhs_cols; ++j) {
														
 
															-          for (int i = 0; i < lhs_rows; ++i) {
														
 
															-            AccumT total = 0;
														
 
															-            for (int k = 0; k < accum_depth; ++k) {
														
 
															-              AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
														
 
															-              AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
														
 
															-              total += (lhs_val + filter_offset) * (rhs_val + input_offset);
														
 
															-            }
														
 
															-            int32_t total_scaled = MultiplyByQuantizedMultiplier(
														
 
															-                total, output_multiplier, output_shift);
														
 
															-            total_scaled += output_offset;
														
 
															-            total_scaled = std::max(total_scaled, output_activation_min);
														
 
															-            total_scaled = std::min(total_scaled, output_activation_max);
														
 
															-            const int idx = lhs_rows * j + i;
														
 
															-            out_ptr[idx] = static_cast<T>(total_scaled);
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h
@@ -1,101 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-#include "ruy/profiler/instrumentation.h"  // from @ruy
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-// TODO(b/135760455): Move this method anonymous namespace in a cc file.
														
 
															-inline RuntimeShape ExtendShapeBatchToSpace(const RuntimeShape& shape) {
														
 
															-  if (shape.DimensionsCount() == 4) {
														
 
															-    return shape;
														
 
															-  }
														
 
															-  RuntimeShape new_shape(4, 1);
														
 
															-  new_shape.SetDim(0, shape.Dims(0));
														
 
															-  new_shape.SetDim(1, shape.Dims(1));
														
 
															-  new_shape.SetDim(3, shape.Dims(2));
														
 
															-  return new_shape;
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void BatchToSpaceND(const RuntimeShape& unextended_input1_shape,
														
 
															-                           const T* input1_data,
														
 
															-                           const RuntimeShape& unextended_input2_shape,
														
 
															-                           const int32_t* block_shape_data,
														
 
															-                           const RuntimeShape& unextended_input3_shape,
														
 
															-                           const int32_t* crops_data,
														
 
															-                           const RuntimeShape& unextended_output_shape,
														
 
															-                           T* output_data) {
														
 
															-  ruy::profiler::ScopeLabel label("BatchToSpaceND");
														
 
															-  TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3);
														
 
															-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(),
														
 
															-                   unextended_output_shape.DimensionsCount());
														
 
															-
														
 
															-  const RuntimeShape input1_shape =
														
 
															-      ExtendShapeBatchToSpace(unextended_input1_shape);
														
 
															-  const RuntimeShape output_shape =
														
 
															-      ExtendShapeBatchToSpace(unextended_output_shape);
														
 
															-
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_batch_size = output_shape.Dims(0);
														
 
															-
														
 
															-  const int depth = input1_shape.Dims(3);
														
 
															-  const int input_width = input1_shape.Dims(2);
														
 
															-  const int input_height = input1_shape.Dims(1);
														
 
															-  const int input_batch_size = input1_shape.Dims(0);
														
 
															-
														
 
															-  const int block_shape_height = block_shape_data[0];
														
 
															-  const int block_shape_width =
														
 
															-      unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1;
														
 
															-  const int crops_top = crops_data[0];
														
 
															-  const int crops_left =
														
 
															-      unextended_input1_shape.DimensionsCount() == 4 ? crops_data[2] : 0;
														
 
															-  for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) {
														
 
															-    const int out_batch = in_batch % output_batch_size;
														
 
															-    const int spatial_offset = in_batch / output_batch_size;
														
 
															-    for (int in_h = 0; in_h < input_height; ++in_h) {
														
 
															-      const int out_h = in_h * block_shape_height +
														
 
															-                        spatial_offset / block_shape_width - crops_top;
														
 
															-      if (out_h < 0 || out_h >= output_height) {
														
 
															-        continue;
														
 
															-      }
														
 
															-      for (int in_w = 0; in_w < input_width; ++in_w) {
														
 
															-        const int out_w = in_w * block_shape_width +
														
 
															-                          spatial_offset % block_shape_width - crops_left;
														
 
															-
														
 
															-        if (out_w < 0 || out_w >= output_width) {
														
 
															-          continue;
														
 
															-        }
														
 
															-        T* out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
														
 
															-        const T* in =
														
 
															-            input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0);
														
 
															-        memcpy(out, in, depth * sizeof(T));
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h
@@ -1,91 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-// Also appears to duplicate MinimumMaximum.
														
 
															-//
														
 
															-// R: Result type. T1: Input 1 type. T2: Input 2 type.
														
 
															-template <typename R, typename T1, typename T2>
														
 
															-inline void BroadcastBinaryFunction4DSlow(
														
 
															-    const RuntimeShape& unextended_input1_shape, const T1* input1_data,
														
 
															-    const RuntimeShape& unextended_input2_shape, const T2* input2_data,
														
 
															-    const RuntimeShape& unextended_output_shape, R* output_data,
														
 
															-    R (*func)(T1, T2)) {
														
 
															-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
														
 
															-  const RuntimeShape output_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
														
 
															-
														
 
															-  NdArrayDesc<4> desc1;
														
 
															-  NdArrayDesc<4> desc2;
														
 
															-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
														
 
															-                                      unextended_input2_shape, &desc1, &desc2);
														
 
															-
														
 
															-  const int* dims_data =
														
 
															-      reinterpret_cast<const int*>(output_shape.DimsDataUpTo5D());
														
 
															-  for (int b = 0; b < output_shape.Dims(0); ++b) {
														
 
															-    int out_idx_b = b * dims_data[1];
														
 
															-    int in_idx1_b = desc1.strides[0] * b;
														
 
															-    int in_idx2_b = desc2.strides[0] * b;
														
 
															-    for (int y = 0; y < output_shape.Dims(1); ++y) {
														
 
															-      int out_idx_y = (out_idx_b + y) * dims_data[2];
														
 
															-      int in_idx1_y = in_idx1_b + desc1.strides[1] * y;
														
 
															-      int in_idx2_y = in_idx2_b + desc2.strides[1] * y;
														
 
															-      for (int x = 0; x < output_shape.Dims(2); ++x) {
														
 
															-        int out_idx_x = (out_idx_y + x) * dims_data[3];
														
 
															-        int in1_idx = in_idx1_y + desc1.strides[2] * x;
														
 
															-        int in2_idx = in_idx2_y + desc2.strides[2] * x;
														
 
															-        for (int c = 0; c < output_shape.Dims(3); ++c) {
														
 
															-          auto out_idx = out_idx_x + c;
														
 
															-          auto in1_val = input1_data[in1_idx];
														
 
															-          auto in2_val = input2_data[in2_idx];
														
 
															-          output_data[out_idx] = func(in1_val, in2_val);
														
 
															-          in1_idx += desc1.strides[3];
														
 
															-          in2_idx += desc2.strides[3];
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// R: Result type. T1: Input 1 type. T2: Input 2 type.
														
 
															-template <typename R, typename T1, typename T2>
														
 
															-inline void BinaryFunction(const RuntimeShape& input1_shape,
														
 
															-                           const T1* input1_data,
														
 
															-                           const RuntimeShape& input2_shape,
														
 
															-                           const T2* input2_data,
														
 
															-                           const RuntimeShape& output_shape, R* output_data,
														
 
															-                           R (*func)(T1, T2)) {
														
 
															-  const int flat_size =
														
 
															-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    output_data[i] = func(input1_data[i], input2_data[i]);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h
@@ -1,56 +0,0 @@
 
															-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data,
														
 
															-                   const RuntimeShape& input2_shape, const T* input2_data,
														
 
															-                   const RuntimeShape& output_shape, T* output_data) {
														
 
															-  // Gets data at the backward index i of the shape tensor. Returns 1 if the
														
 
															-  // index is out of range.
														
 
															-  auto get_shape_data = [](const RuntimeShape& shape, const T* data,
														
 
															-                           int backward_idx) -> T {
														
 
															-    int forward_idx = shape.FlatSize() - 1 - backward_idx;
														
 
															-    if (forward_idx < 0) return 1;
														
 
															-    return data[forward_idx];
														
 
															-  };
														
 
															-
														
 
															-  int output_num_elements = output_shape.FlatSize();
														
 
															-  for (int i = 0; i < output_num_elements; ++i) {
														
 
															-    int backward_i = output_num_elements - 1 - i;
														
 
															-    int shape1_i = get_shape_data(input1_shape, input1_data, i);
														
 
															-    int shape2_i = get_shape_data(input2_shape, input2_data, i);
														
 
															-    if (shape1_i == 1) {
														
 
															-      output_data[backward_i] = shape2_i;
														
 
															-    } else if (shape2_i == 1) {
														
 
															-      output_data[backward_i] = shape1_i;
														
 
															-    } else {
														
 
															-      TFLITE_CHECK_EQ(shape1_i, shape2_i);
														
 
															-      output_data[backward_i] = shape1_i;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h
@@ -1,97 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/kernel_util.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-template <int N>
														
 
															-void BroadcastImpl(const NdArrayDesc<N>& input_desc, const char* input_data,
														
 
															-                   const NdArrayDesc<N>& output_desc, char* output_data,
														
 
															-                   int indexes[N], int dim, const int last_broadcasting_dim,
														
 
															-                   const int type_size) {
														
 
															-  // Copy data from input to output.
														
 
															-  if (dim == last_broadcasting_dim) {
														
 
															-    int copy_size = output_desc.strides[dim] * type_size;
														
 
															-    const char* data_src =
														
 
															-        input_data + SubscriptToIndex(input_desc, indexes) * type_size;
														
 
															-    char* data_dst =
														
 
															-        output_data + SubscriptToIndex(output_desc, indexes) * type_size;
														
 
															-    for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
														
 
															-      memcpy(data_dst, data_src, copy_size);
														
 
															-    }
														
 
															-    return;
														
 
															-  }
														
 
															-
														
 
															-  // Recursive call to find the next broadcasting.
														
 
															-  for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim];
														
 
															-       ++indexes[dim]) {
														
 
															-    BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes,
														
 
															-                     dim + 1, last_broadcasting_dim, type_size);
														
 
															-  }
														
 
															-
														
 
															-  // Duplicate data in output tensor.
														
 
															-  indexes[dim] = 0;
														
 
															-  if (input_desc.extents[dim] != output_desc.extents[dim]) {
														
 
															-    int copy_size = output_desc.strides[dim] * type_size;
														
 
															-    char* data_src =
														
 
															-        output_data + SubscriptToIndex(output_desc, indexes) * type_size;
														
 
															-    char* data_dst = data_src + copy_size;
														
 
															-    for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
														
 
															-      memcpy(data_dst, data_src, copy_size);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <int N>
														
 
															-inline void BroadcastTo(const RuntimeShape& unextended_input_shape,
														
 
															-                        const char* input_data,
														
 
															-                        const RuntimeShape& unextended_output_shape,
														
 
															-                        char* output_data, TfLiteType data_type) {
														
 
															-  NdArrayDesc<N> input_desc;
														
 
															-  NdArrayDesc<N> output_desc;
														
 
															-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
														
 
															-                 &input_desc);
														
 
															-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
														
 
															-                 &output_desc);
														
 
															-
														
 
															-  // Get the last dimension has broadcasting. At this dimension, the data is
														
 
															-  // copied from input tensor to output tensor.
														
 
															-  int last_broadcast_dim = -1;
														
 
															-  for (int i = N - 1; i >= 0; --i) {
														
 
															-    if (input_desc.extents[i] != output_desc.extents[i]) {
														
 
															-      last_broadcast_dim = i;
														
 
															-      break;
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  // If non-broadcasting, just copy data from input to output tensor.
														
 
															-  if (last_broadcast_dim == -1) {
														
 
															-    memcpy(output_data, input_data,
														
 
															-           unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type));
														
 
															-    return;
														
 
															-  }
														
 
															-
														
 
															-  // Broadcasting using memcpy.
														
 
															-  int indexes[N] = {0};
														
 
															-  BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes, 0,
														
 
															-                   last_broadcast_dim, TfLiteTypeGetSize(data_type));
														
 
															-}
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h
@@ -1,37 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
														
 
															-                 const RuntimeShape& output_shape, float* output_data) {
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    output_data[i] = std::ceil(input_data[i]);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h
@@ -1,280 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
														
 
															-
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-inline bool EqualFn(T lhs, T rhs) {
														
 
															-  return lhs == rhs;
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline bool NotEqualFn(T lhs, T rhs) {
														
 
															-  return lhs != rhs;
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline bool GreaterFn(T lhs, T rhs) {
														
 
															-  return lhs > rhs;
														
 
															-}
														
 
															-template <typename T>
														
 
															-inline bool GreaterEqualFn(T lhs, T rhs) {
														
 
															-  return lhs >= rhs;
														
 
															-}
														
 
															-template <typename T>
														
 
															-inline bool LessFn(T lhs, T rhs) {
														
 
															-  return lhs < rhs;
														
 
															-}
														
 
															-template <typename T>
														
 
															-inline bool LessEqualFn(T lhs, T rhs) {
														
 
															-  return lhs <= rhs;
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-using ComparisonFn = bool (*)(T, T);
														
 
															-
														
 
															-template <typename T, ComparisonFn<T> F>
														
 
															-inline void ComparisonImpl(
														
 
															-    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
														
 
															-    const T* input1_data, const RuntimeShape& input2_shape,
														
 
															-    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
														
 
															-  const int64_t flatsize =
														
 
															-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
														
 
															-  for (int64_t i = 0; i < flatsize; ++i) {
														
 
															-    output_data[i] = F(input1_data[i], input2_data[i]);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <ComparisonFn<float> F>
														
 
															-inline void Comparison(const ComparisonParams& op_params,
														
 
															-                       const RuntimeShape& input1_shape,
														
 
															-                       const float* input1_data,
														
 
															-                       const RuntimeShape& input2_shape,
														
 
															-                       const float* input2_data,
														
 
															-                       const RuntimeShape& output_shape, bool* output_data) {
														
 
															-  ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
														
 
															-                           input2_data, output_shape, output_data);
														
 
															-}
														
 
															-
														
 
															-template <typename T, ComparisonFn<int32_t> F>
														
 
															-inline void ComparisonWithScaling(
														
 
															-    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
														
 
															-    const T* input1_data, const RuntimeShape& input2_shape,
														
 
															-    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
														
 
															-  int left_shift = op_params.left_shift;
														
 
															-  int32_t input1_offset = op_params.input1_offset;
														
 
															-  int32_t input1_multiplier = op_params.input1_multiplier;
														
 
															-  int input1_shift = op_params.input1_shift;
														
 
															-  int32_t input2_offset = op_params.input2_offset;
														
 
															-  int32_t input2_multiplier = op_params.input2_multiplier;
														
 
															-  int input2_shift = op_params.input2_shift;
														
 
															-
														
 
															-  const int64_t flatsize =
														
 
															-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
														
 
															-  for (int64_t i = 0; i < flatsize; ++i) {
														
 
															-    const int32_t input1_val = input1_offset + input1_data[i];
														
 
															-    const int32_t input2_val = input2_offset + input2_data[i];
														
 
															-    const int32_t shifted_input1_val = input1_val * (1 << left_shift);
														
 
															-    const int32_t shifted_input2_val = input2_val * (1 << left_shift);
														
 
															-    const int32_t scaled_input1_val =
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            shifted_input1_val, input1_multiplier, input1_shift);
														
 
															-    const int32_t scaled_input2_val =
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            shifted_input2_val, input2_multiplier, input2_shift);
														
 
															-    output_data[i] = F(scaled_input1_val, scaled_input2_val);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-struct BroadcastComparison4DSlowCommon {
														
 
															-  const RuntimeShape output_shape;
														
 
															-  NdArrayDesc<4> desc1;
														
 
															-  NdArrayDesc<4> desc2;
														
 
															-};
														
 
															-
														
 
															-inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
														
 
															-    const RuntimeShape& unextended_input1_shape,
														
 
															-    const RuntimeShape& unextended_input2_shape,
														
 
															-    const RuntimeShape& unextended_output_shape) {
														
 
															-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
														
 
															-  NdArrayDesc<4> desc1;
														
 
															-  NdArrayDesc<4> desc2;
														
 
															-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
														
 
															-                                      unextended_input2_shape, &desc1, &desc2);
														
 
															-  return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
														
 
															-          desc2};
														
 
															-}
														
 
															-
														
 
															-template <typename T, ComparisonFn<T> F>
														
 
															-inline void BroadcastComparison4DSlowImpl(
														
 
															-    const ComparisonParams& op_params,
														
 
															-    const RuntimeShape& unextended_input1_shape, const T* input1_data,
														
 
															-    const RuntimeShape& unextended_input2_shape, const T* input2_data,
														
 
															-    const RuntimeShape& unextended_output_shape, bool* output_data) {
														
 
															-  const BroadcastComparison4DSlowCommon dims =
														
 
															-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
														
 
															-                                          unextended_input2_shape,
														
 
															-                                          unextended_output_shape);
														
 
															-
														
 
															-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
														
 
															-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
														
 
															-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
														
 
															-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
														
 
															-          output_data[Offset(dims.output_shape, b, y, x, c)] =
														
 
															-              F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
														
 
															-                input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <ComparisonFn<float> F>
														
 
															-inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
														
 
															-                                      const RuntimeShape& input1_shape,
														
 
															-                                      const float* input1_data,
														
 
															-                                      const RuntimeShape& input2_shape,
														
 
															-                                      const float* input2_data,
														
 
															-                                      const RuntimeShape& output_shape,
														
 
															-                                      bool* output_data) {
														
 
															-  BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
														
 
															-                                          input2_shape, input2_data,
														
 
															-                                          output_shape, output_data);
														
 
															-}
														
 
															-
														
 
															-template <typename T, ComparisonFn<int32_t> F>
														
 
															-inline void BroadcastComparison4DSlowWithScaling(
														
 
															-    const ComparisonParams& op_params,
														
 
															-    const RuntimeShape& unextended_input1_shape, const T* input1_data,
														
 
															-    const RuntimeShape& unextended_input2_shape, const T* input2_data,
														
 
															-    const RuntimeShape& unextended_output_shape, bool* output_data) {
														
 
															-  const BroadcastComparison4DSlowCommon dims =
														
 
															-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
														
 
															-                                          unextended_input2_shape,
														
 
															-                                          unextended_output_shape);
														
 
															-
														
 
															-  int left_shift = op_params.left_shift;
														
 
															-  int32_t input1_offset = op_params.input1_offset;
														
 
															-  int32_t input1_multiplier = op_params.input1_multiplier;
														
 
															-  int input1_shift = op_params.input1_shift;
														
 
															-  int32_t input2_offset = op_params.input2_offset;
														
 
															-  int32_t input2_multiplier = op_params.input2_multiplier;
														
 
															-  int input2_shift = op_params.input2_shift;
														
 
															-
														
 
															-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
														
 
															-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
														
 
															-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
														
 
															-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
														
 
															-          const int32_t input1_val =
														
 
															-              input1_offset +
														
 
															-              input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
														
 
															-          const int32_t input2_val =
														
 
															-              input2_offset +
														
 
															-              input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
														
 
															-          const int32_t shifted_input1_val = input1_val * (1 << left_shift);
														
 
															-          const int32_t shifted_input2_val = input2_val * (1 << left_shift);
														
 
															-          const int32_t scaled_input1_val =
														
 
															-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-                  shifted_input1_val, input1_multiplier, input1_shift);
														
 
															-          const int32_t scaled_input2_val =
														
 
															-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-                  shifted_input2_val, input2_multiplier, input2_shift);
														
 
															-          output_data[Offset(dims.output_shape, b, y, x, c)] =
														
 
															-              F(scaled_input1_val, scaled_input2_val);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-#define TFLITE_COMPARISON_OP(name)                                             \
														
 
															-  inline void name(const ComparisonParams& op_params,                          \
														
 
															-                   const RuntimeShape& input1_shape, const float* input1_data, \
														
 
															-                   const RuntimeShape& input2_shape, const float* input2_data, \
														
 
															-                   const RuntimeShape& output_shape, bool* output_data) {      \
														
 
															-    Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape,   \
														
 
															-                         input2_data, output_shape, output_data);              \
														
 
															-  }                                                                            \
														
 
															-  template <typename T>                                                        \
														
 
															-  inline void name##NoScaling(                                                 \
														
 
															-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
														
 
															-      const T* input1_data, const RuntimeShape& input2_shape,                  \
														
 
															-      const T* input2_data, const RuntimeShape& output_shape,                  \
														
 
															-      bool* output_data) {                                                     \
														
 
															-    ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data,          \
														
 
															-                                input2_shape, input2_data, output_shape,       \
														
 
															-                                output_data);                                  \
														
 
															-  }                                                                            \
														
 
															-  template <typename T>                                                        \
														
 
															-  inline void name##WithScaling(                                               \
														
 
															-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
														
 
															-      const T* input1_data, const RuntimeShape& input2_shape,                  \
														
 
															-      const T* input2_data, const RuntimeShape& output_shape,                  \
														
 
															-      bool* output_data) {                                                     \
														
 
															-    ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data,   \
														
 
															-                                       input2_shape, input2_data,              \
														
 
															-                                       output_shape, output_data);             \
														
 
															-  }                                                                            \
														
 
															-  template <typename T>                                                        \
														
 
															-  inline void Broadcast4DSlow##name##NoScaling(                                \
														
 
															-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
														
 
															-      const T* input1_data, const RuntimeShape& input2_shape,                  \
														
 
															-      const T* input2_data, const RuntimeShape& output_shape,                  \
														
 
															-      bool* output_data) {                                                     \
														
 
															-    BroadcastComparison4DSlowImpl<T, name##Fn>(                                \
														
 
															-        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
														
 
															-        output_shape, output_data);                                            \
														
 
															-  }                                                                            \
														
 
															-  inline void Broadcast4DSlow##name(                                           \
														
 
															-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
														
 
															-      const float* input1_data, const RuntimeShape& input2_shape,              \
														
 
															-      const float* input2_data, const RuntimeShape& output_shape,              \
														
 
															-      bool* output_data) {                                                     \
														
 
															-    BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data,  \
														
 
															-                                        input2_shape, input2_data,             \
														
 
															-                                        output_shape, output_data);            \
														
 
															-  }                                                                            \
														
 
															-  template <typename T>                                                        \
														
 
															-  inline void Broadcast4DSlow##name##WithScaling(                              \
														
 
															-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
														
 
															-      const T* input1_data, const RuntimeShape& input2_shape,                  \
														
 
															-      const T* input2_data, const RuntimeShape& output_shape,                  \
														
 
															-      bool* output_data) {                                                     \
														
 
															-    BroadcastComparison4DSlowWithScaling<T, name##Fn>(                         \
														
 
															-        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
														
 
															-        output_shape, output_data);                                            \
														
 
															-  }
														
 
															-TFLITE_COMPARISON_OP(Equal);
														
 
															-TFLITE_COMPARISON_OP(NotEqual);
														
 
															-TFLITE_COMPARISON_OP(Greater);
														
 
															-TFLITE_COMPARISON_OP(GreaterEqual);
														
 
															-TFLITE_COMPARISON_OP(Less);
														
 
															-TFLITE_COMPARISON_OP(LessEqual);
														
 
															-#undef TFLITE_COMPARISON_OP
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h
@@ -1,141 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename Scalar>
														
 
															-inline void Concatenation(const ConcatenationParams& params,
														
 
															-                          const RuntimeShape* const* input_shapes,
														
 
															-                          const Scalar* const* input_data,
														
 
															-                          const RuntimeShape& output_shape,
														
 
															-                          Scalar* output_data) {
														
 
															-  int axis = params.axis;
														
 
															-  int inputs_count = params.inputs_count;
														
 
															-  const int concat_dimensions = output_shape.DimensionsCount();
														
 
															-  TFLITE_DCHECK_LT(axis, concat_dimensions);
														
 
															-
														
 
															-  int64_t concat_size = 0;
														
 
															-  for (int i = 0; i < inputs_count; i++) {
														
 
															-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
														
 
															-    for (int j = 0; j < concat_dimensions; j++) {
														
 
															-      if (j != axis) {
														
 
															-        MatchingDim(*input_shapes[i], j, output_shape, j);
														
 
															-      }
														
 
															-    }
														
 
															-    concat_size += input_shapes[i]->Dims(axis);
														
 
															-  }
														
 
															-  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
														
 
															-  int64_t outer_size = 1;
														
 
															-  for (int i = 0; i < axis; ++i) {
														
 
															-    outer_size *= output_shape.Dims(i);
														
 
															-  }
														
 
															-  // For all input arrays,
														
 
															-  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
														
 
															-  int64_t base_inner_size = 1;
														
 
															-  for (int i = axis + 1; i < concat_dimensions; ++i) {
														
 
															-    base_inner_size *= output_shape.Dims(i);
														
 
															-  }
														
 
															-
														
 
															-  Scalar* output_ptr = output_data;
														
 
															-  for (int k = 0; k < outer_size; k++) {
														
 
															-    for (int i = 0; i < inputs_count; ++i) {
														
 
															-      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
														
 
															-      const Scalar* input_ptr = input_data[i] + k * copy_size;
														
 
															-      memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
														
 
															-      output_ptr += copy_size;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// TODO(b/174275780): The quantized implementation of concatentation isn't fully
														
 
															-// quantized as it takes scale as a floating point value. This should be fixed
														
 
															-// when optimizng this routine further.
														
 
															-inline void ConcatenationWithScaling(const ConcatenationParams& params,
														
 
															-                                     const RuntimeShape* const* input_shapes,
														
 
															-                                     const uint8_t* const* input_data,
														
 
															-                                     const RuntimeShape& output_shape,
														
 
															-                                     uint8_t* output_data) {
														
 
															-  int axis = params.axis;
														
 
															-  const int32_t* input_zeropoint = params.input_zeropoint;
														
 
															-  const float* input_scale = params.input_scale;
														
 
															-  int inputs_count = params.inputs_count;
														
 
															-  const int32_t output_zeropoint = params.output_zeropoint;
														
 
															-  const float output_scale = params.output_scale;
														
 
															-
														
 
															-  const int concat_dimensions = output_shape.DimensionsCount();
														
 
															-  TFLITE_DCHECK_LT(axis, concat_dimensions);
														
 
															-
														
 
															-  int64_t concat_size = 0;
														
 
															-  for (int i = 0; i < inputs_count; i++) {
														
 
															-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
														
 
															-    for (int j = 0; j < concat_dimensions; j++) {
														
 
															-      if (j != axis) {
														
 
															-        MatchingDim(*input_shapes[i], j, output_shape, j);
														
 
															-      }
														
 
															-    }
														
 
															-    concat_size += input_shapes[i]->Dims(axis);
														
 
															-  }
														
 
															-  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
														
 
															-  int64_t outer_size = 1;
														
 
															-  for (int i = 0; i < axis; ++i) {
														
 
															-    outer_size *= output_shape.Dims(i);
														
 
															-  }
														
 
															-  // For all input arrays,
														
 
															-  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
														
 
															-  int64_t base_inner_size = 1;
														
 
															-  for (int i = axis + 1; i < concat_dimensions; ++i) {
														
 
															-    base_inner_size *= output_shape.Dims(i);
														
 
															-  }
														
 
															-
														
 
															-  const float inverse_output_scale = 1.f / output_scale;
														
 
															-  uint8_t* output_ptr = output_data;
														
 
															-  for (int k = 0; k < outer_size; k++) {
														
 
															-    for (int i = 0; i < inputs_count; ++i) {
														
 
															-      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
														
 
															-      const uint8_t* input_ptr = input_data[i] + k * copy_size;
														
 
															-      if (input_zeropoint[i] == output_zeropoint &&
														
 
															-          input_scale[i] == output_scale) {
														
 
															-        memcpy(output_ptr, input_ptr, copy_size);
														
 
															-      } else {
														
 
															-        const float scale = input_scale[i] * inverse_output_scale;
														
 
															-        const float bias = -input_zeropoint[i] * scale;
														
 
															-        for (int j = 0; j < copy_size; ++j) {
														
 
															-          const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
														
 
															-                                    input_ptr[j] * scale + bias)) +
														
 
															-                                output_zeropoint;
														
 
															-          output_ptr[j] = static_cast<uint8_t>(
														
 
															-              std::max<int32_t>(std::min<int32_t>(255, value), 0));
														
 
															-        }
														
 
															-      }
														
 
															-      output_ptr += copy_size;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h
@@ -1,287 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
														
 
															-                 const float* input_data, const RuntimeShape& filter_shape,
														
 
															-                 const float* filter_data, const RuntimeShape& bias_shape,
														
 
															-                 const float* bias_data, const RuntimeShape& output_shape,
														
 
															-                 float* output_data, const RuntimeShape& im2col_shape,
														
 
															-                 float* im2col_data) {
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  const float output_activation_min = params.float_activation_min;
														
 
															-  const float output_activation_max = params.float_activation_max;
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-
														
 
															-  (void)im2col_data;   // only used in optimized code.
														
 
															-  (void)im2col_shape;  // only used in optimized code.
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
														
 
															-  if (bias_data) {
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-  }
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int filter_input_depth = filter_shape.Dims(3);
														
 
															-  const int groups = input_depth / filter_input_depth;
														
 
															-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
														
 
															-  const int filters_per_group = output_depth / groups;
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
														
 
															-          auto group = out_channel / filters_per_group;
														
 
															-          float total = 0.f;
														
 
															-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
														
 
															-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-
														
 
															-              // Zero padding by omitting the areas outside the image.
														
 
															-              const bool is_point_inside_image =
														
 
															-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                  (in_y < input_height);
														
 
															-
														
 
															-              if (!is_point_inside_image) {
														
 
															-                continue;
														
 
															-              }
														
 
															-              for (int in_channel = 0; in_channel < filter_input_depth;
														
 
															-                   ++in_channel) {
														
 
															-                float input_value =
														
 
															-                    input_data[Offset(input_shape, batch, in_y, in_x,
														
 
															-                                      in_channel + group * filter_input_depth)];
														
 
															-                float filter_value = filter_data[Offset(
														
 
															-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
														
 
															-                total += (input_value * filter_value);
														
 
															-              }
														
 
															-            }
														
 
															-          }
														
 
															-          float bias_value = 0.0f;
														
 
															-          if (bias_data) {
														
 
															-            bias_value = bias_data[out_channel];
														
 
															-          }
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
														
 
															-              ActivationFunctionWithMinMax(total + bias_value,
														
 
															-                                           output_activation_min,
														
 
															-                                           output_activation_max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
														
 
															-                 const uint8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-                 const uint8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-                 const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-                 uint8_t* output_data, const RuntimeShape& im2col_shape,
														
 
															-                 uint8_t* im2col_data, void* cpu_backend_context) {
														
 
															-  (void)cpu_backend_context;  // only used in optimized code.
														
 
															-  (void)im2col_data;          // only used in optimized code.
														
 
															-  (void)im2col_shape;         // only used in optimized code.
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  const int32_t input_offset = params.input_offset;
														
 
															-  const int32_t filter_offset = params.weights_offset;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-  const int32_t output_multiplier = params.output_multiplier;
														
 
															-  const int output_shift = params.output_shift;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
														
 
															-  if (bias_data) {
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-  }
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int filter_input_depth = filter_shape.Dims(3);
														
 
															-  const int groups = input_depth / filter_input_depth;
														
 
															-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
														
 
															-  const int filters_per_group = output_depth / groups;
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
														
 
															-          auto group = out_channel / filters_per_group;
														
 
															-          int32_t acc = 0;
														
 
															-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
														
 
															-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-
														
 
															-              // Zero padding by omitting the areas outside the image.
														
 
															-              const bool is_point_inside_image =
														
 
															-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                  (in_y < input_height);
														
 
															-
														
 
															-              if (!is_point_inside_image) {
														
 
															-                continue;
														
 
															-              }
														
 
															-
														
 
															-              for (int in_channel = 0; in_channel < filter_input_depth;
														
 
															-                   ++in_channel) {
														
 
															-                int32_t input_val =
														
 
															-                    input_data[Offset(input_shape, batch, in_y, in_x,
														
 
															-                                      in_channel + group * filter_input_depth)];
														
 
															-                int32_t filter_val = filter_data[Offset(
														
 
															-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
														
 
															-                acc +=
														
 
															-                    (filter_val + filter_offset) * (input_val + input_offset);
														
 
															-              }
														
 
															-            }
														
 
															-          }
														
 
															-          if (bias_data) {
														
 
															-            acc += bias_data[out_channel];
														
 
															-          }
														
 
															-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
														
 
															-                                              output_shift);
														
 
															-          acc += output_offset;
														
 
															-          acc = std::max(acc, output_activation_min);
														
 
															-          acc = std::min(acc, output_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
														
 
															-              static_cast<uint8_t>(acc);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void HybridConvPerChannel(
														
 
															-    const ConvParams& params, float* scaling_factors_ptr,
														
 
															-    const RuntimeShape& input_shape, const int8_t* input_data,
														
 
															-    const RuntimeShape& filter_shape, const int8_t* filter_data,
														
 
															-    const RuntimeShape& bias_shape, const float* bias_data,
														
 
															-    const RuntimeShape& output_shape, float* output_data,
														
 
															-    const RuntimeShape& im2col_shape, int8_t* im2col_data,
														
 
															-    const float* per_channel_scale, int32_t* input_offset) {
														
 
															-  (void)im2col_data;   // only used in optimized code.
														
 
															-  (void)im2col_shape;  // only used in optimized code.
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  const float output_activation_min = params.float_activation_min;
														
 
															-  const float output_activation_max = params.float_activation_max;
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
														
 
															-  if (bias_data) {
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-  }
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int filter_input_depth = filter_shape.Dims(3);
														
 
															-  const int groups = input_depth / filter_input_depth;
														
 
															-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
														
 
															-  const int filters_per_group = output_depth / groups;
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
														
 
															-          auto group = out_channel / filters_per_group;
														
 
															-          const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-          const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-          int32_t acc = 0;
														
 
															-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-              for (int in_channel = 0; in_channel < filter_input_depth;
														
 
															-                   ++in_channel) {
														
 
															-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-                const int in_y =
														
 
															-                    in_y_origin + dilation_height_factor * filter_y;
														
 
															-                // If the location is outside the bounds of the input image,
														
 
															-                // use zero as a default value.
														
 
															-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                    (in_y < input_height)) {
														
 
															-                  int32_t input_val = input_data[Offset(
														
 
															-                      input_shape, batch, in_y, in_x,
														
 
															-                      in_channel + group * filter_input_depth)];
														
 
															-                  int32_t filter_val =
														
 
															-                      filter_data[Offset(filter_shape, out_channel, filter_y,
														
 
															-                                         filter_x, in_channel)];
														
 
															-                  acc += filter_val * (input_val - input_offset[batch]);
														
 
															-                }
														
 
															-              }
														
 
															-            }
														
 
															-          }
														
 
															-          float acc_float =
														
 
															-              acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
														
 
															-          if (bias_data) {
														
 
															-            acc_float += bias_data[out_channel];
														
 
															-          }
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
														
 
															-              ActivationFunctionWithMinMax(acc_float, output_activation_min,
														
 
															-                                           output_activation_max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h
@@ -1,175 +0,0 @@
 
															-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <cstdint>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
														
 
															-                   bool exclusive, bool reverse, T* output_data) {
														
 
															-  const int32_t rank = shape.DimensionsCount();
														
 
															-  TFLITE_DCHECK_GE(rank, 1);
														
 
															-  TFLITE_DCHECK_GE(axis, 0);
														
 
															-  TFLITE_DCHECK_LT(axis, rank);
														
 
															-
														
 
															-  size_t inner = 1;
														
 
															-  size_t outer = 1;
														
 
															-  size_t depth = 1;
														
 
															-  for (int32_t i = 0; i < rank; i++) {
														
 
															-    if (i < axis)
														
 
															-      inner *= shape.Dims(i);
														
 
															-    else if (i > axis)
														
 
															-      outer *= shape.Dims(i);
														
 
															-    else
														
 
															-      depth = shape.Dims(i);
														
 
															-  }
														
 
															-
														
 
															-  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
														
 
															-    size_t outer_index_adj;
														
 
															-    if (reverse)
														
 
															-      outer_index_adj = (outer - 1) - outer_index;
														
 
															-    else
														
 
															-      outer_index_adj = outer_index;
														
 
															-    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
														
 
															-      T accumulator = 0;
														
 
															-      size_t inner_index_adj;
														
 
															-      if (reverse)
														
 
															-        inner_index_adj = (inner - 1) - inner_index;
														
 
															-      else
														
 
															-        inner_index_adj = inner_index;
														
 
															-      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
														
 
															-        size_t depth_index_adj;
														
 
															-        if (reverse)
														
 
															-          depth_index_adj = (depth - 1) - depth_index;
														
 
															-        else
														
 
															-          depth_index_adj = depth_index;
														
 
															-
														
 
															-        size_t index = outer_index_adj;
														
 
															-        index += inner_index_adj * depth * outer;
														
 
															-        index += depth_index_adj * outer;
														
 
															-
														
 
															-        if (exclusive) {
														
 
															-          output_data[index] = accumulator;
														
 
															-          accumulator += input_data[index];
														
 
															-        } else {
														
 
															-          accumulator += input_data[index];
														
 
															-          output_data[index] = accumulator;
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-//
														
 
															-// Quantized INT8 CUMSUM
														
 
															-//
														
 
															-inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
														
 
															-                   const RuntimeShape& shape, int32_t axis, bool exclusive,
														
 
															-                   bool reverse, int8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  // Input offset is negative input zero point. Activation tensors are
														
 
															-  // asymmetric quantized so they span the full int8 range.
														
 
															-  // All inputs should have same zero-point and scale, this is checked during
														
 
															-  // Prepare stage.
														
 
															-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
														
 
															-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
														
 
															-
														
 
															-  const int32_t rank = shape.DimensionsCount();
														
 
															-  TFLITE_DCHECK_GE(rank, 1);
														
 
															-  TFLITE_DCHECK_GE(axis, 0);
														
 
															-  TFLITE_DCHECK_LT(axis, rank);
														
 
															-
														
 
															-  size_t inner = 1;
														
 
															-  size_t outer = 1;
														
 
															-  size_t depth = 1;
														
 
															-  for (int32_t i = 0; i < rank; i++) {
														
 
															-    if (i < axis)
														
 
															-      inner *= shape.Dims(i);
														
 
															-    else if (i > axis)
														
 
															-      outer *= shape.Dims(i);
														
 
															-    else
														
 
															-      depth = shape.Dims(i);
														
 
															-  }
														
 
															-
														
 
															-  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
														
 
															-    size_t outer_index_adj;
														
 
															-    if (reverse)
														
 
															-      outer_index_adj = (outer - 1) - outer_index;
														
 
															-    else
														
 
															-      outer_index_adj = outer_index;
														
 
															-    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
														
 
															-      int32_t accumulator = params.input1_offset;  // accumulator = 0
														
 
															-      accumulator *= (1 << params.left_shift);
														
 
															-      accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-          accumulator, params.input1_multiplier, params.input1_shift);
														
 
															-
														
 
															-      size_t inner_index_adj;
														
 
															-      if (reverse)
														
 
															-        inner_index_adj = (inner - 1) - inner_index;
														
 
															-      else
														
 
															-        inner_index_adj = inner_index;
														
 
															-
														
 
															-      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
														
 
															-        size_t depth_index_adj;
														
 
															-        if (reverse)
														
 
															-          depth_index_adj = (depth - 1) - depth_index;
														
 
															-        else
														
 
															-          depth_index_adj = depth_index;
														
 
															-
														
 
															-        size_t index = outer_index_adj;
														
 
															-        index += inner_index_adj * depth * outer;
														
 
															-        index += depth_index_adj * outer;
														
 
															-
														
 
															-        const int32_t y = params.input1_offset + input_data[index];
														
 
															-        const int32_t shifted_y = y * (1 << params.left_shift);
														
 
															-        const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            shifted_y, params.input1_multiplier, params.input1_shift);
														
 
															-
														
 
															-        int32_t scaled_output;
														
 
															-        if (exclusive) {
														
 
															-          scaled_output = accumulator;
														
 
															-          accumulator += scaled_y;
														
 
															-        } else {
														
 
															-          accumulator += scaled_y;
														
 
															-          scaled_output = accumulator;
														
 
															-        }
														
 
															-
														
 
															-        const int32_t raw_output =
														
 
															-            MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-                scaled_output, params.output_multiplier, params.output_shift) +
														
 
															-            params.output_offset;
														
 
															-        const int32_t clamped_output =
														
 
															-            std::min(params.quantized_activation_max,
														
 
															-                     std::max(params.quantized_activation_min, raw_output));
														
 
															-        output_data[index] = static_cast<int8_t>(clamped_output);
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h
@@ -1,79 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
														
 
															-                         const RuntimeShape& unextended_input_shape,
														
 
															-                         const T* input_data,
														
 
															-                         const RuntimeShape& unextended_output_shape,
														
 
															-                         T* output_data) {
														
 
															-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
														
 
															-  const RuntimeShape input_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
														
 
															-  const RuntimeShape output_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
														
 
															-
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_batch = input_shape.Dims(0);
														
 
															-
														
 
															-  const int output_depth = output_shape.Dims(3);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_batch = output_shape.Dims(0);
														
 
															-
														
 
															-  const int32_t block_size = op_params.block_size;
														
 
															-
														
 
															-  TFLITE_DCHECK_EQ(input_width * block_size, output_width);
														
 
															-  TFLITE_DCHECK_EQ(input_height * block_size, output_height);
														
 
															-  TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
														
 
															-  TFLITE_DCHECK_EQ(input_batch, output_batch);
														
 
															-
														
 
															-  for (int out_b = 0; out_b < output_batch; ++out_b) {
														
 
															-    for (int out_h = 0; out_h < output_height; ++out_h) {
														
 
															-      for (int out_w = 0; out_w < output_width; ++out_w) {
														
 
															-        for (int out_d = 0; out_d < output_depth; ++out_d) {
														
 
															-          const int in_d =
														
 
															-              out_d + ((out_h % block_size) * block_size + out_w % block_size) *
														
 
															-                          output_depth;
														
 
															-
														
 
															-          const int in_w = out_w / block_size;
														
 
															-          const int in_h = out_h / block_size;
														
 
															-          const int in_b = out_b;
														
 
															-
														
 
															-          const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
														
 
															-          const int output_index =
														
 
															-              Offset(output_shape, out_b, out_h, out_w, out_d);
														
 
															-
														
 
															-          output_data[output_index] = input_data[input_index];
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
@@ -1,100 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void DepthwiseConv(
														
 
															-    const DepthwiseParams& params, const RuntimeShape& input_shape,
														
 
															-    const float* input_data, const RuntimeShape& filter_shape,
														
 
															-    const float* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const float* bias_data, const RuntimeShape& output_shape,
														
 
															-    float* output_data) {
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  const int depth_multiplier = params.depth_multiplier;
														
 
															-  const float output_activation_min = params.float_activation_min;
														
 
															-  const float output_activation_max = params.float_activation_max;
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
														
 
															-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int ic = 0; ic < input_depth; ++ic) {
														
 
															-          for (int m = 0; m < depth_multiplier; m++) {
														
 
															-            const int oc = m + ic * depth_multiplier;
														
 
															-            const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-            const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-            float total = 0.f;
														
 
															-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-                const int in_y =
														
 
															-                    in_y_origin + dilation_height_factor * filter_y;
														
 
															-                // If the location is outside the bounds of the input image,
														
 
															-                // use zero as a default value.
														
 
															-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                    (in_y < input_height)) {
														
 
															-                  float input_value =
														
 
															-                      input_data[Offset(input_shape, b, in_y, in_x, ic)];
														
 
															-                  float filter_value = filter_data[Offset(
														
 
															-                      filter_shape, 0, filter_y, filter_x, oc)];
														
 
															-                  total += (input_value * filter_value);
														
 
															-                }
														
 
															-              }
														
 
															-            }
														
 
															-            float bias_value = 0.0f;
														
 
															-            if (bias_data) {
														
 
															-              bias_value = bias_data[oc];
														
 
															-            }
														
 
															-            output_data[Offset(output_shape, b, out_y, out_x, oc)] =
														
 
															-                ActivationFunctionWithMinMax(total + bias_value,
														
 
															-                                             output_activation_min,
														
 
															-                                             output_activation_max);
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // end namespace reference_ops
														
 
															-}  // end namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -1,319 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "fixedpoint/fixedpoint.h"
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-// Used in tests and template parameters to control which version of depthwise
														
 
															-// convolution is called. Primarily for reference code, and specializations
														
 
															-// forced in tests.
														
 
															-enum class DepthwiseConvImplementation {
														
 
															-  // Run all tests against kUseStandardEntry even if also testing another
														
 
															-  // kernel, since we need to be sure that the main DepthwiseConv() function in
														
 
															-  // optimized_ops.h dispatches to a correctly-executing kernel.
														
 
															-  kNone = 0,                 // The "default" option: use the normal
														
 
															-                             // DepthwiseConv kernel (entry) function.
														
 
															-  kUseGenericKernel,         // Forced use of generic kernel.
														
 
															-  kUseNeon3x3,               // 3x3 kernel that uses NEON when available.
														
 
															-  kUseNeon3x3DotProduct,     // 3x3 kernel that uses dot-product enabled NEON
														
 
															-                             // when available.
														
 
															-  kUseCModel3x3DotProduct,   // 3x3 kernel, reference C model that is intended
														
 
															-                             // to match overall design NEON code.
														
 
															-  kUseUnwound3x3DotProduct,  // 3x3 kernel, reference C model with unwound loops
														
 
															-                             // and some arrays.
														
 
															-  kUseIntrinsics3x3DotProduct,  // 3x3 kernel using NEON intrinsics.
														
 
															-};
														
 
															-
														
 
															-// Category of depthwise convolution output rounding.
														
 
															-enum class DepthwiseConvOutputRounding {
														
 
															-  kNone = 0,      // Invalid: specific method must be specified.
														
 
															-  kAwayFromZero,  // Original method: exact halves rounded away from zero.
														
 
															-  kUpward,        // Halves towards +infinity: adds 0.5 before truncate.
														
 
															-  // This is where a future kNearestEven would be placed.
														
 
															-};
														
 
															-
														
 
															-// Category of depthwise convolution depth multiplication.
														
 
															-enum class DepthwiseConvDepthMultiplication {
														
 
															-  kNoMultiplication = 0,  // Depth multiplier = 1.
														
 
															-  kUnitInputDepth,        // Input depth = 1, output depth = depth multiplier.
														
 
															-};
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-namespace depthwise_conv {
														
 
															-
														
 
															-template <DepthwiseConvOutputRounding output_rounding>
														
 
															-inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
														
 
															-                                  int shift) {
														
 
															-  TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
														
 
															-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
														
 
															-}
														
 
															-
														
 
															-// Single-rounding MultiplyByQuantizedMultiplier
														
 
															-#if TFLITE_SINGLE_ROUNDING
														
 
															-template <>
														
 
															-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
														
 
															-    int32_t x, int32_t quantized_multiplier, int shift) {
														
 
															-  using gemmlowp::RoundingDivideByPOT;
														
 
															-  using gemmlowp::SaturatingRoundingDoublingHighMul;
														
 
															-  int left_shift = shift > 0 ? shift : 0;
														
 
															-  int right_shift = shift > 0 ? 0 : -shift;
														
 
															-  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
														
 
															-                                 x * (1 << left_shift), quantized_multiplier),
														
 
															-                             right_shift);
														
 
															-}
														
 
															-
														
 
															-template <>
														
 
															-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
														
 
															-    int32_t x, int32_t quantized_multiplier, int shift) {
														
 
															-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
														
 
															-}
														
 
															-// Double-rounding MultiplyByQuantizedMultiplier
														
 
															-#else
														
 
															-template <>
														
 
															-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
														
 
															-    int32_t x, int32_t quantized_multiplier, int shift) {
														
 
															-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
														
 
															-}
														
 
															-
														
 
															-template <>
														
 
															-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
														
 
															-    int32_t x, int32_t quantized_multiplier, int shift) {
														
 
															-  using gemmlowp::SaturatingRoundingDoublingHighMul;
														
 
															-  const int left_shift = shift > 0 ? shift : 0;
														
 
															-  const int right_shift = shift > 0 ? 0 : -shift;
														
 
															-  const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
														
 
															-  return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
														
 
															-                                            quantized_multiplier) +
														
 
															-          rounding_offset) >>
														
 
															-         right_shift;
														
 
															-}
														
 
															-#endif  // TFLITE_SINGLE_ROUNDING
														
 
															-
														
 
															-template <DepthwiseConvOutputRounding output_rounding>
														
 
															-struct DepthwiseConvBasicKernel {
														
 
															-  static inline void Run(
														
 
															-      const DepthwiseParams& params, const RuntimeShape& input_shape,
														
 
															-      const uint8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-      const uint8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-      const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-      uint8_t* output_data) {
														
 
															-    const int stride_width = params.stride_width;
														
 
															-    const int stride_height = params.stride_height;
														
 
															-    const int dilation_width_factor = params.dilation_width_factor;
														
 
															-    const int dilation_height_factor = params.dilation_height_factor;
														
 
															-    const int pad_width = params.padding_values.width;
														
 
															-    const int pad_height = params.padding_values.height;
														
 
															-    const int depth_multiplier = params.depth_multiplier;
														
 
															-    const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-    const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-    const int32_t input_offset = params.input_offset;
														
 
															-    const int32_t filter_offset = params.weights_offset;
														
 
															-    const int32_t output_offset = params.output_offset;
														
 
															-    const int32_t output_multiplier = params.output_multiplier;
														
 
															-    const int output_shift = params.output_shift;
														
 
															-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-
														
 
															-    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
														
 
															-    const int input_height = input_shape.Dims(1);
														
 
															-    const int input_width = input_shape.Dims(2);
														
 
															-    const int input_depth = input_shape.Dims(3);
														
 
															-    const int filter_height = filter_shape.Dims(1);
														
 
															-    const int filter_width = filter_shape.Dims(2);
														
 
															-    const int output_height = output_shape.Dims(1);
														
 
															-    const int output_width = output_shape.Dims(2);
														
 
															-    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-
														
 
															-    for (int b = 0; b < batches; ++b) {
														
 
															-      for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-        for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-          for (int ic = 0; ic < input_depth; ++ic) {
														
 
															-            for (int m = 0; m < depth_multiplier; m++) {
														
 
															-              const int oc = m + ic * depth_multiplier;
														
 
															-              const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-              const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-              int32_t acc = 0;
														
 
															-              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-                  const int in_x =
														
 
															-                      in_x_origin + dilation_width_factor * filter_x;
														
 
															-                  const int in_y =
														
 
															-                      in_y_origin + dilation_height_factor * filter_y;
														
 
															-                  // If the location is outside the bounds of the input image,
														
 
															-                  // use zero as a default value.
														
 
															-                  if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                      (in_y < input_height)) {
														
 
															-                    int32_t input_val =
														
 
															-                        input_data[Offset(input_shape, b, in_y, in_x, ic)];
														
 
															-                    int32_t filter_val = filter_data[Offset(
														
 
															-                        filter_shape, 0, filter_y, filter_x, oc)];
														
 
															-                    acc += (filter_val + filter_offset) *
														
 
															-                           (input_val + input_offset);
														
 
															-                  }
														
 
															-                }
														
 
															-              }
														
 
															-              if (bias_data) {
														
 
															-                acc += bias_data[oc];
														
 
															-              }
														
 
															-              acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
														
 
															-                                                        output_shift);
														
 
															-              acc += output_offset;
														
 
															-              acc = std::max(acc, output_activation_min);
														
 
															-              acc = std::min(acc, output_activation_max);
														
 
															-              output_data[Offset(output_shape, b, out_y, out_x, oc)] =
														
 
															-                  static_cast<uint8_t>(acc);
														
 
															-            }
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  // TODO(b/148596273): Reconcile reference versions, perhaps with common
														
 
															-  // MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
														
 
															-  static inline void RunPerChannel(
														
 
															-      const DepthwiseParams& params, const RuntimeShape& input_shape,
														
 
															-      const int8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-      const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-      const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-      int8_t* output_data) {
														
 
															-    // Get parameters.
														
 
															-    // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
														
 
															-    const int stride_width = params.stride_width;
														
 
															-    const int stride_height = params.stride_height;
														
 
															-    const int dilation_width_factor = params.dilation_width_factor;
														
 
															-    const int dilation_height_factor = params.dilation_height_factor;
														
 
															-    const int pad_width = params.padding_values.width;
														
 
															-    const int pad_height = params.padding_values.height;
														
 
															-    const int depth_multiplier = params.depth_multiplier;
														
 
															-    const int32_t input_offset = params.input_offset;
														
 
															-    const int32_t output_offset = params.output_offset;
														
 
															-    const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-    const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-    const int32_t* output_multiplier = params.output_multiplier_per_channel;
														
 
															-    const int32_t* output_shift = params.output_shift_per_channel;
														
 
															-
														
 
															-    // Check dimensions of the tensors.
														
 
															-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-
														
 
															-    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
														
 
															-    const int input_height = input_shape.Dims(1);
														
 
															-    const int input_width = input_shape.Dims(2);
														
 
															-    const int input_depth = input_shape.Dims(3);
														
 
															-    const int filter_height = filter_shape.Dims(1);
														
 
															-    const int filter_width = filter_shape.Dims(2);
														
 
															-    const int output_height = output_shape.Dims(1);
														
 
															-    const int output_width = output_shape.Dims(2);
														
 
															-    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-
														
 
															-    for (int batch = 0; batch < batches; ++batch) {
														
 
															-      for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-        for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-          for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
														
 
															-            for (int m = 0; m < depth_multiplier; ++m) {
														
 
															-              const int output_channel = m + in_channel * depth_multiplier;
														
 
															-              const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-              const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-              int32_t acc = 0;
														
 
															-              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-                  const int in_x =
														
 
															-                      in_x_origin + dilation_width_factor * filter_x;
														
 
															-                  const int in_y =
														
 
															-                      in_y_origin + dilation_height_factor * filter_y;
														
 
															-                  // Zero padding by omitting the areas outside the image.
														
 
															-                  const bool is_point_inside_image =
														
 
															-                      (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                      (in_y < input_height);
														
 
															-                  if (is_point_inside_image) {
														
 
															-                    int32_t input_val = input_data[Offset(
														
 
															-                        input_shape, batch, in_y, in_x, in_channel)];
														
 
															-                    int32_t filter_val = filter_data[Offset(
														
 
															-                        filter_shape, 0, filter_y, filter_x, output_channel)];
														
 
															-                    // Accumulate with 32 bits accumulator.
														
 
															-                    // In the nudging process during model quantization, we
														
 
															-                    // force real value of 0.0 be represented by a quantized
														
 
															-                    // value. This guarantees that the input_offset is a int8_t,
														
 
															-                    // even though it is represented using int32_t. int32_t +=
														
 
															-                    // int8_t
														
 
															-                    // * (int8_t - int8_t) so the highest value we can get from
														
 
															-                    // each accumulation is [-127, 127] * ([-128, 127] -
														
 
															-                    // [-128, 127]), which is [-32512, 32512]. log2(32512)
														
 
															-                    // = 14.98, which means we can accumulate at least 2^16
														
 
															-                    // multiplications without overflow. The accumulator is
														
 
															-                    // applied to a filter so the accumulation logic will hold
														
 
															-                    // as long as the filter size (filter_y * filter_x *
														
 
															-                    // in_channel) does not exceed 2^16, which is the case in
														
 
															-                    // all the models we have seen so far.
														
 
															-                    acc += filter_val * (input_val + input_offset);
														
 
															-                  }
														
 
															-                }
														
 
															-              }
														
 
															-              if (bias_data) {
														
 
															-                acc += bias_data[output_channel];
														
 
															-              }
														
 
															-              acc = DepthwiseConvRound<output_rounding>(
														
 
															-                  acc, output_multiplier[output_channel],
														
 
															-                  output_shift[output_channel]);
														
 
															-              acc += output_offset;
														
 
															-              acc = std::max(acc, output_activation_min);
														
 
															-              acc = std::min(acc, output_activation_max);
														
 
															-              output_data[Offset(output_shape, batch, out_y, out_x,
														
 
															-                                 output_channel)] = static_cast<int8_t>(acc);
														
 
															-            }
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-};
														
 
															-
														
 
															-}  // namespace depthwise_conv
														
 
															-
														
 
															-inline void DepthwiseConv(
														
 
															-    const DepthwiseParams& params, const RuntimeShape& input_shape,
														
 
															-    const uint8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    uint8_t* output_data) {
														
 
															-  return depthwise_conv::DepthwiseConvBasicKernel<
														
 
															-      DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
														
 
															-                                                       input_data, filter_shape,
														
 
															-                                                       filter_data, bias_shape,
														
 
															-                                                       bias_data, output_shape,
														
 
															-                                                       output_data);
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // end namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h
@@ -1,78 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
														
 
															-
														
 
															-#include <limits.h>
														
 
															-
														
 
															-#include <vector>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-// Dequantizes into a float without rounding.
														
 
															-template <typename InputT, typename OutputT>
														
 
															-inline void Dequantize(const tflite::DequantizationParams& op_params,
														
 
															-                       const RuntimeShape& input_shape,
														
 
															-                       const InputT* input_data,
														
 
															-                       const RuntimeShape& output_shape, OutputT* output_data) {
														
 
															-  int32_t zero_point = op_params.zero_point;
														
 
															-  const double scale = op_params.scale;
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; i++) {
														
 
															-    const int32_t val = input_data[i];
														
 
															-    const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
														
 
															-    output_data[i] = result;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Dequantizes per-channel quantized tensor to float.
														
 
															-template <typename T>
														
 
															-inline void PerChannelDequantize(
														
 
															-    const tflite::PerChannelDequantizationParams& op_params,
														
 
															-    const RuntimeShape& input_shape, const T* input_data,
														
 
															-    const RuntimeShape& output_shape, float* output_data) {
														
 
															-  // Ensure flat size is same.
														
 
															-  MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  const int32_t* zero_point = op_params.zero_point;
														
 
															-  const float* scale = op_params.scale;
														
 
															-  const int32_t quantized_dimension = op_params.quantized_dimension;
														
 
															-  const int32_t num_dims = input_shape.DimensionsCount();
														
 
															-  const int32_t* dims_data = input_shape.DimsData();
														
 
															-  std::vector<int> current_dim(num_dims, 0);
														
 
															-
														
 
															-  do {
														
 
															-    size_t offset =
														
 
															-        ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
														
 
															-                            current_dim.data(), 0, nullptr);
														
 
															-    const int channel = current_dim[quantized_dimension];
														
 
															-    const int32_t val = input_data[offset];
														
 
															-    const float result =
														
 
															-        static_cast<float>(scale[channel] * (val - zero_point[channel]));
														
 
															-    output_data[offset] = result;
														
 
															-  } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
														
 
															-                     current_dim.data()));
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-
														
 
															-}  // namespace tflite
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h
@@ -1,247 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  // Input offset is negative input zero point. Activation tensors are
														
 
															-  // asymmetric quantized so they span the full int8 range.
														
 
															-  constexpr int32_t max_value =
														
 
															-      static_cast<int32_t>(std::numeric_limits<T>::max());
														
 
															-  TFLITE_DCHECK_GE(params.input1_offset, -max_value);
														
 
															-  TFLITE_DCHECK_LE(params.input1_offset, max_value);
														
 
															-  TFLITE_DCHECK_GE(params.input2_offset, -max_value);
														
 
															-  TFLITE_DCHECK_LE(params.input2_offset, max_value);
														
 
															-  TFLITE_DCHECK_GE(params.output_offset, -max_value);
														
 
															-  TFLITE_DCHECK_LE(params.output_offset, max_value);
														
 
															-}
														
 
															-
														
 
															-// Element-wise div that can often be used for inner loop of broadcast Div as
														
 
															-// well as the non-broadcast Div.
														
 
															-template <typename T>
														
 
															-inline void DivElementwise(int size, const ArithmeticParams& params,
														
 
															-                           const T* input1_data, const T* input2_data,
														
 
															-                           T* output_data) {
														
 
															-  DivCheckArithmeticParams<T>(params);
														
 
															-
														
 
															-  for (int i = 0; i < size; ++i) {
														
 
															-    int32_t input1_val = params.input1_offset + input1_data[i];
														
 
															-    int32_t input2_val = params.input2_offset + input2_data[i];
														
 
															-    TFLITE_DCHECK_NE(input2_val, 0);
														
 
															-    if (input2_val < 0) {
														
 
															-      // Invert signs to avoid a negative input2_val as input2_inv needs to be
														
 
															-      // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
														
 
															-      input1_val = -input1_val;
														
 
															-      input2_val = -input2_val;
														
 
															-    }
														
 
															-    int recip_shift;
														
 
															-    const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
														
 
															-    const int headroom = CountLeadingSignBits(input1_val);
														
 
															-    const int32_t unscaled_quotient =
														
 
															-        MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
														
 
															-                                                    headroom);
														
 
															-    const int total_shift = params.output_shift - recip_shift - headroom;
														
 
															-    const int32_t unclamped_result =
														
 
															-        params.output_offset +
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            unscaled_quotient, params.output_multiplier, total_shift);
														
 
															-    const int32_t clamped_output =
														
 
															-        std::min(params.quantized_activation_max,
														
 
															-                 std::max(params.quantized_activation_min, unclamped_result));
														
 
															-    output_data[i] = static_cast<T>(clamped_output);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void Div(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
														
 
															-                const RuntimeShape& output_shape, uint8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-
														
 
															-  DivElementwise(flat_size, params, input1_data, input2_data, output_data);
														
 
															-}
														
 
															-
														
 
															-inline void Div(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const int8_t* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const int8_t* input2_data,
														
 
															-                const RuntimeShape& output_shape, int8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-
														
 
															-  DivElementwise(flat_size, params, input1_data, input2_data, output_data);
														
 
															-}
														
 
															-
														
 
															-template <typename T, int N = 5>
														
 
															-inline void BroadcastDivSlowQuantized(
														
 
															-    const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
														
 
															-    const T* input1_data, const RuntimeShape& unextended_input2_shape,
														
 
															-    const T* input2_data, const RuntimeShape& unextended_output_shape,
														
 
															-    T* output_data) {
														
 
															-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
														
 
															-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
														
 
															-
														
 
															-  NdArrayDesc<N> desc1;
														
 
															-  NdArrayDesc<N> desc2;
														
 
															-  NdArrayDesc<N> output_desc;
														
 
															-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
														
 
															-                                      unextended_input2_shape, &desc1, &desc2);
														
 
															-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
														
 
															-                 &output_desc);
														
 
															-
														
 
															-  DivCheckArithmeticParams<T>(params);
														
 
															-
														
 
															-  auto div_func = [&](int indexes[N]) {
														
 
															-    int32_t input1_val =
														
 
															-        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
														
 
															-    int32_t input2_val =
														
 
															-        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
														
 
															-    TFLITE_DCHECK_NE(input2_val, 0);
														
 
															-    if (input2_val < 0) {
														
 
															-      // Invert signs to avoid a negative input2_val as input2_inv needs to be
														
 
															-      // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
														
 
															-      input1_val = -input1_val;
														
 
															-      input2_val = -input2_val;
														
 
															-    }
														
 
															-    int recip_shift;
														
 
															-    const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
														
 
															-    const int headroom = CountLeadingSignBits(input1_val);
														
 
															-    const int32_t unscaled_quotient =
														
 
															-        MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
														
 
															-                                                    headroom);
														
 
															-    const int total_shift = params.output_shift - recip_shift - headroom;
														
 
															-    const int32_t unclamped_result =
														
 
															-        params.output_offset +
														
 
															-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-            unscaled_quotient, params.output_multiplier, total_shift);
														
 
															-    const int32_t clamped_output =
														
 
															-        std::min(params.quantized_activation_max,
														
 
															-                 std::max(params.quantized_activation_min, unclamped_result));
														
 
															-    output_data[SubscriptToIndex(output_desc, indexes)] =
														
 
															-        static_cast<T>(clamped_output);
														
 
															-  };
														
 
															-  NDOpsHelper<N>(output_desc, div_func);
														
 
															-}
														
 
															-
														
 
															-template <int N = 5>
														
 
															-inline void BroadcastDivSlow(const ArithmeticParams& params,
														
 
															-                             const RuntimeShape& unextended_input1_shape,
														
 
															-                             const uint8_t* input1_data,
														
 
															-                             const RuntimeShape& unextended_input2_shape,
														
 
															-                             const uint8_t* input2_data,
														
 
															-                             const RuntimeShape& unextended_output_shape,
														
 
															-                             uint8_t* output_data) {
														
 
															-  BroadcastDivSlowQuantized<uint8_t, N>(
														
 
															-      params, unextended_input1_shape, input1_data, unextended_input2_shape,
														
 
															-      input2_data, unextended_output_shape, output_data);
														
 
															-}
														
 
															-
														
 
															-template <int N = 5>
														
 
															-inline void BroadcastDivSlow(const ArithmeticParams& params,
														
 
															-                             const RuntimeShape& unextended_input1_shape,
														
 
															-                             const int8_t* input1_data,
														
 
															-                             const RuntimeShape& unextended_input2_shape,
														
 
															-                             const int8_t* input2_data,
														
 
															-                             const RuntimeShape& unextended_output_shape,
														
 
															-                             int8_t* output_data) {
														
 
															-  BroadcastDivSlowQuantized<int8_t, N>(
														
 
															-      params, unextended_input1_shape, input1_data, unextended_input2_shape,
														
 
															-      input2_data, unextended_output_shape, output_data);
														
 
															-}
														
 
															-
														
 
															-// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
														
 
															-// dimensionality if the runtime code does a single loop over one dimension
														
 
															-// that handles broadcasting as the base case. The code generator would then
														
 
															-// generate max(D1, D2) nested for loops.
														
 
															-template <typename T, int N = 5>
														
 
															-void BroadcastDivSlow(const ArithmeticParams& params,
														
 
															-                      const RuntimeShape& unextended_input1_shape,
														
 
															-                      const T* input1_data,
														
 
															-                      const RuntimeShape& unextended_input2_shape,
														
 
															-                      const T* input2_data,
														
 
															-                      const RuntimeShape& unextended_output_shape,
														
 
															-                      T* output_data) {
														
 
															-  T output_activation_min;
														
 
															-  T output_activation_max;
														
 
															-  GetActivationParams(params, &output_activation_min, &output_activation_max);
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
														
 
															-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
														
 
															-
														
 
															-  NdArrayDesc<N> desc1;
														
 
															-  NdArrayDesc<N> desc2;
														
 
															-  NdArrayDesc<N> output_desc;
														
 
															-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
														
 
															-                                      unextended_input2_shape, &desc1, &desc2);
														
 
															-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
														
 
															-                 &output_desc);
														
 
															-
														
 
															-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
														
 
															-  // col, channel), with extents (batches, height, width, depth), with the
														
 
															-  // trailing dimension changing most rapidly (channels has the smallest
														
 
															-  // stride, typically 1 element).
														
 
															-  //
														
 
															-  // In generated C code, we store arrays with the dimensions reversed. The
														
 
															-  // first dimension has smallest stride.
														
 
															-
														
 
															-  auto div_func = [&](int indexes[N]) {
														
 
															-    output_data[SubscriptToIndex(output_desc, indexes)] =
														
 
															-        ActivationFunctionWithMinMax(
														
 
															-            input1_data[SubscriptToIndex(desc1, indexes)] /
														
 
															-                input2_data[SubscriptToIndex(desc2, indexes)],
														
 
															-            output_activation_min, output_activation_max);
														
 
															-  };
														
 
															-  NDOpsHelper<N>(output_desc, div_func);
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void Div(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const T* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const T* input2_data,
														
 
															-                const RuntimeShape& output_shape, T* output_data) {
														
 
															-  T output_activation_min;
														
 
															-  T output_activation_max;
														
 
															-  GetActivationParams(params, &output_activation_min, &output_activation_max);
														
 
															-
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    output_data[i] = ActivationFunctionWithMinMax(
														
 
															-        input1_data[i] / input2_data[i], output_activation_min,
														
 
															-        output_activation_max);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h
@@ -1,37 +0,0 @@
 
															-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void Elu(const RuntimeShape& input_shape, const float* input_data,
														
 
															-                const RuntimeShape& output_shape, float* output_data) {
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    const float val = input_data[i];
														
 
															-    output_data[i] = val < 0.0f ? TfLiteExpm1(val) : val;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h
@@ -1,38 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-#include "ruy/profiler/instrumentation.h"  // from @ruy
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void Exp(const T* input_data, const size_t num_elements,
														
 
															-                T* output_data) {
														
 
															-  ruy::profiler::ScopeLabel label("Exp");
														
 
															-  for (size_t idx = 0; idx < num_elements; ++idx) {
														
 
															-    output_data[idx] = std::exp(input_data[idx]);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h
@@ -1,38 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-void Fill(const RuntimeShape& value_shape, const T* value_data,
														
 
															-          const RuntimeShape& output_shape, T* output_data) {
														
 
															-  TFLITE_DCHECK_EQ(value_shape.DimensionsCount(), 0);
														
 
															-  const int flat_size = output_shape.FlatSize();
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    output_data[i] = *value_data;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h
@@ -1,39 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void Floor(const RuntimeShape& input_shape, const float* input_data,
														
 
															-                  const RuntimeShape& output_shape, float* output_data) {
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; i++) {
														
 
															-    int offset = i;
														
 
															-    output_data[offset] = std::floor(input_data[offset]);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h
@@ -1,35 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-#include <functional>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-T FloorDiv(T input1, T input2) {
														
 
															-  return std::floor(std::divides<double>()(static_cast<double>(input1),
														
 
															-                                           static_cast<double>(input2)));
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h
@@ -1,44 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-#include <functional>
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-T FloorMod(T input1, T input2) {
														
 
															-  struct FloatMod {
														
 
															-    float operator()(const float lhs, const float rhs) const {
														
 
															-      return std::fmod(lhs, rhs);
														
 
															-    }
														
 
															-  };
														
 
															-  using ModFunc = typename std::conditional<std::is_integral<T>::value,
														
 
															-                                            std::modulus<T>, FloatMod>::type;
														
 
															-  ModFunc mod_func;
														
 
															-  T trunc_mod = mod_func(input1, input2);
														
 
															-  return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
														
 
															-             ? (trunc_mod + input2)
														
 
															-             : trunc_mod;
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h
@@ -1,323 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "ruy/profiler/instrumentation.h"  // from @ruy
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-#include "tensorflow/lite/kernels/internal/quantization_util.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void FullyConnected(
														
 
															-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
														
 
															-    const float* input_data, const RuntimeShape& weights_shape,
														
 
															-    const float* weights_data, const RuntimeShape& bias_shape,
														
 
															-    const float* bias_data, const RuntimeShape& output_shape,
														
 
															-    float* output_data) {
														
 
															-  const float output_activation_min = params.float_activation_min;
														
 
															-  const float output_activation_max = params.float_activation_max;
														
 
															-  // TODO(b/62193649): This really should be:
														
 
															-  //     const int batches = ArraySize(output_dims, 1);
														
 
															-  // but the current --variable_batch hack consists in overwriting the 3rd
														
 
															-  // dimension with the runtime batch size, as we don't keep track for each
														
 
															-  // array of which dimension is the batch dimension in it.
														
 
															-  const int output_dims_count = output_shape.DimensionsCount();
														
 
															-  const int weights_dims_count = weights_shape.DimensionsCount();
														
 
															-  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
														
 
															-  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
														
 
															-                                       output_shape, output_dims_count - 1);
														
 
															-  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int out_c = 0; out_c < output_depth; ++out_c) {
														
 
															-      float total = 0.f;
														
 
															-      for (int d = 0; d < accum_depth; ++d) {
														
 
															-        total += input_data[b * accum_depth + d] *
														
 
															-                 weights_data[out_c * accum_depth + d];
														
 
															-      }
														
 
															-      float bias_value = 0.0f;
														
 
															-      if (bias_data) {
														
 
															-        bias_value = bias_data[out_c];
														
 
															-      }
														
 
															-      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
														
 
															-          total + bias_value, output_activation_min, output_activation_max);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void FullyConnected(
														
 
															-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
														
 
															-    const uint8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    uint8_t* output_data) {
														
 
															-  const int32_t input_offset = params.input_offset;
														
 
															-  const int32_t filter_offset = params.weights_offset;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-  const int32_t output_multiplier = params.output_multiplier;
														
 
															-  const int output_shift = params.output_shift;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
														
 
															-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  // TODO(b/62193649): This really should be:
														
 
															-  //     const int batches = ArraySize(output_dims, 1);
														
 
															-  // but the current --variable_batch hack consists in overwriting the 3rd
														
 
															-  // dimension with the runtime batch size, as we don't keep track for each
														
 
															-  // array of which dimension is the batch dimension in it.
														
 
															-  const int output_dim_count = output_shape.DimensionsCount();
														
 
															-  const int filter_dim_count = filter_shape.DimensionsCount();
														
 
															-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
														
 
															-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
														
 
															-                                       output_shape, output_dim_count - 1);
														
 
															-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int out_c = 0; out_c < output_depth; ++out_c) {
														
 
															-      int32_t acc = 0;
														
 
															-      for (int d = 0; d < accum_depth; ++d) {
														
 
															-        int32_t input_val = input_data[b * accum_depth + d];
														
 
															-        int32_t filter_val = filter_data[out_c * accum_depth + d];
														
 
															-        acc += (filter_val + filter_offset) * (input_val + input_offset);
														
 
															-      }
														
 
															-      if (bias_data) {
														
 
															-        acc += bias_data[out_c];
														
 
															-      }
														
 
															-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
														
 
															-      acc += output_offset;
														
 
															-      acc = std::max(acc, output_activation_min);
														
 
															-      acc = std::min(acc, output_activation_max);
														
 
															-      output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void FullyConnected(
														
 
															-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
														
 
															-    const uint8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    int16_t* output_data) {
														
 
															-  const int32_t input_offset = params.input_offset;
														
 
															-  const int32_t filter_offset = params.weights_offset;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-  const int32_t output_multiplier = params.output_multiplier;
														
 
															-  const int output_shift = params.output_shift;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  TFLITE_DCHECK_EQ(output_offset, 0);
														
 
															-  // TODO(b/62193649): This really should be:
														
 
															-  //     const int batches = ArraySize(output_dims, 1);
														
 
															-  // but the current --variable_batch hack consists in overwriting the 3rd
														
 
															-  // dimension with the runtime batch size, as we don't keep track for each
														
 
															-  // array of which dimension is the batch dimension in it.
														
 
															-  const int output_dim_count = output_shape.DimensionsCount();
														
 
															-  const int filter_dim_count = filter_shape.DimensionsCount();
														
 
															-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
														
 
															-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
														
 
															-                                       output_shape, output_dim_count - 1);
														
 
															-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int out_c = 0; out_c < output_depth; ++out_c) {
														
 
															-      // Internal accumulation.
														
 
															-      // Initialize accumulator with the bias-value.
														
 
															-      int32_t accum = bias_data[out_c];
														
 
															-      // Accumulation loop.
														
 
															-      for (int d = 0; d < accum_depth; ++d) {
														
 
															-        int16_t input_val = input_data[b * accum_depth + d] + input_offset;
														
 
															-        int16_t filter_val =
														
 
															-            filter_data[out_c * accum_depth + d] + filter_offset;
														
 
															-        accum += filter_val * input_val;
														
 
															-      }
														
 
															-      // Down-scale the final int32_t accumulator to the scale used by our
														
 
															-      // (16-bit, typically 3 integer bits) fixed-point format. The quantized
														
 
															-      // multiplier and shift here have been pre-computed offline
														
 
															-      // (e.g. by toco).
														
 
															-      accum =
														
 
															-          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
														
 
															-      // Saturate, cast to int16_t, and store to output array.
														
 
															-      accum = std::max(accum, output_activation_min - output_offset);
														
 
															-      accum = std::min(accum, output_activation_max - output_offset);
														
 
															-      accum += output_offset;
														
 
															-      output_data[out_c + output_depth * b] = accum;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void ShuffledFullyConnected(
														
 
															-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
														
 
															-    const uint8_t* input_data, const RuntimeShape& weights_shape,
														
 
															-    const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
														
 
															-  const int32_t output_multiplier = params.output_multiplier;
														
 
															-  const int output_shift = params.output_shift;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-
														
 
															-  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
														
 
															-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
														
 
															-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
														
 
															-  // TODO(b/62193649): This really should be:
														
 
															-  //     const int batches = ArraySize(output_dims, 1);
														
 
															-  // but the current --variable_batch hack consists in overwriting the 3rd
														
 
															-  // dimension with the runtime batch size, as we don't keep track for each
														
 
															-  // array of which dimension is the batch dimension in it.
														
 
															-  const int output_dim_count = output_shape.DimensionsCount();
														
 
															-  const int weights_dim_count = weights_shape.DimensionsCount();
														
 
															-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
														
 
															-  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
														
 
															-                                       output_shape, output_dim_count - 1);
														
 
															-  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
														
 
															-  TFLITE_DCHECK((accum_depth % 16) == 0);
														
 
															-  TFLITE_DCHECK((output_depth % 4) == 0);
														
 
															-
														
 
															-  // Shuffling and xoring of input activations into the workspace buffer
														
 
															-  uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
														
 
															-  if (batches == 1) {
														
 
															-    for (int i = 0; i < accum_depth; i++) {
														
 
															-      shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
														
 
															-    }
														
 
															-  } else if (batches == 4) {
														
 
															-    for (int c = 0; c < accum_depth; c += 16) {
														
 
															-      for (int b = 0; b < 4; b++) {
														
 
															-        const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
														
 
															-        for (int j = 0; j < 16; j++) {
														
 
															-          uint8_t src_val = *src_data_ptr++;
														
 
															-          // Flip the sign bit, so that the kernel will only need to
														
 
															-          // reinterpret these uint8_t values as int8_t, getting for free the
														
 
															-          // subtraction of the zero_point value 128.
														
 
															-          uint8_t dst_val = src_val ^ 0x80;
														
 
															-          *shuffled_input_workspace_ptr++ = dst_val;
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  } else {
														
 
															-    TFLITE_DCHECK(false);
														
 
															-    return;
														
 
															-  }
														
 
															-
														
 
															-  // Actual computation
														
 
															-  if (batches == 1) {
														
 
															-    int16_t* output_ptr = output_data;
														
 
															-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
														
 
															-    // so that just reinterpreting them as int8_t values is equivalent to
														
 
															-    // subtracting 128 from them, thus implementing for free the subtraction of
														
 
															-    // the zero_point value 128.
														
 
															-    const int8_t* shuffled_weights_ptr =
														
 
															-        reinterpret_cast<const int8_t*>(shuffled_weights_data);
														
 
															-    // Likewise, we preshuffled and pre-xored the input data above.
														
 
															-    const int8_t* shuffled_input_data =
														
 
															-        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
														
 
															-    for (int c = 0; c < output_depth; c += 4) {
														
 
															-      // Internal accumulation.
														
 
															-      // Initialize accumulator with the bias-value.
														
 
															-      int32_t accum[4] = {0};
														
 
															-      // Accumulation loop.
														
 
															-      for (int d = 0; d < accum_depth; d += 16) {
														
 
															-        for (int i = 0; i < 4; i++) {
														
 
															-          for (int j = 0; j < 16; j++) {
														
 
															-            int8_t input_val = shuffled_input_data[d + j];
														
 
															-            int8_t weights_val = *shuffled_weights_ptr++;
														
 
															-            accum[i] += weights_val * input_val;
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-      for (int i = 0; i < 4; i++) {
														
 
															-        // Add bias value
														
 
															-        int32_t acc = accum[i] + bias_data[c + i];
														
 
															-        // Down-scale the final int32_t accumulator to the scale used by our
														
 
															-        // (16-bit, typically 3 integer bits) fixed-point format. The quantized
														
 
															-        // multiplier and shift here have been pre-computed offline
														
 
															-        // (e.g. by toco).
														
 
															-        acc =
														
 
															-            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
														
 
															-        // Saturate, cast to int16_t, and store to output array.
														
 
															-        acc = std::max(acc, output_activation_min);
														
 
															-        acc = std::min(acc, output_activation_max);
														
 
															-        output_ptr[c + i] = acc;
														
 
															-      }
														
 
															-    }
														
 
															-  } else if (batches == 4) {
														
 
															-    int16_t* output_ptr = output_data;
														
 
															-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
														
 
															-    // so that just reinterpreting them as int8_t values is equivalent to
														
 
															-    // subtracting 128 from them, thus implementing for free the subtraction of
														
 
															-    // the zero_point value 128.
														
 
															-    const int8_t* shuffled_weights_ptr =
														
 
															-        reinterpret_cast<const int8_t*>(shuffled_weights_data);
														
 
															-    // Likewise, we preshuffled and pre-xored the input data above.
														
 
															-    const int8_t* shuffled_input_data =
														
 
															-        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
														
 
															-    for (int c = 0; c < output_depth; c += 4) {
														
 
															-      const int8_t* shuffled_input_ptr = shuffled_input_data;
														
 
															-      // Accumulation loop.
														
 
															-      // Internal accumulation.
														
 
															-      // Initialize accumulator with the bias-value.
														
 
															-      int32_t accum[4][4];
														
 
															-      for (int i = 0; i < 4; i++) {
														
 
															-        for (int b = 0; b < 4; b++) {
														
 
															-          accum[i][b] = 0;
														
 
															-        }
														
 
															-      }
														
 
															-      for (int d = 0; d < accum_depth; d += 16) {
														
 
															-        for (int i = 0; i < 4; i++) {
														
 
															-          for (int b = 0; b < 4; b++) {
														
 
															-            for (int j = 0; j < 16; j++) {
														
 
															-              int8_t input_val = shuffled_input_ptr[16 * b + j];
														
 
															-              int8_t weights_val = shuffled_weights_ptr[16 * i + j];
														
 
															-              accum[i][b] += weights_val * input_val;
														
 
															-            }
														
 
															-          }
														
 
															-        }
														
 
															-        shuffled_input_ptr += 64;
														
 
															-        shuffled_weights_ptr += 64;
														
 
															-      }
														
 
															-      for (int i = 0; i < 4; i++) {
														
 
															-        for (int b = 0; b < 4; b++) {
														
 
															-          // Add bias value
														
 
															-          int32_t acc = accum[i][b] + bias_data[c + i];
														
 
															-          // Down-scale the final int32_t accumulator to the scale used by our
														
 
															-          // (16-bit, typically 3 integer bits) fixed-point format. The
														
 
															-          // quantized multiplier and shift here have been pre-computed offline
														
 
															-          // (e.g. by toco).
														
 
															-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
														
 
															-                                              output_shift);
														
 
															-          // Saturate, cast to int16_t, and store to output array.
														
 
															-          acc = std::max(acc, output_activation_min);
														
 
															-          acc = std::min(acc, output_activation_max);
														
 
															-          output_ptr[b * output_depth + c + i] = acc;
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  } else {
														
 
															-    TFLITE_DCHECK(false);
														
 
															-    return;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
@@ -1,145 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-inline void CheckArithmeticParams(const ArithmeticParams& params) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  // Input offset is negative input zero point. Activation tensors are
														
 
															-  // asymmetric quantized so they span the full int8 range.
														
 
															-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
														
 
															-  TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
														
 
															-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
														
 
															-  TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
														
 
															-}
														
 
															-
														
 
															-inline void ElementWise(
														
 
															-    int size, const ArithmeticParams& params, const int8_t* input1_data,
														
 
															-    const int8_t* input2_data, int8_t* output_data,
														
 
															-    void (*check_arithmetic_params)(const ArithmeticParams&),
														
 
															-    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
														
 
															-  CheckArithmeticParams(params);
														
 
															-  for (int i = 0; i < size; ++i) {
														
 
															-    output_data[i] = binary_func(input1_data[i], input2_data[i], params);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void BroadcastBinaryFunction4DSlow(
														
 
															-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
														
 
															-    const int8_t* input1_data, const RuntimeShape& input2_shape,
														
 
															-    const int8_t* input2_data, const RuntimeShape& output_shape,
														
 
															-    int8_t* output_data,
														
 
															-    void (*check_arithmetic_params)(const ArithmeticParams&),
														
 
															-    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
														
 
															-  NdArrayDesc<4> desc1;
														
 
															-  NdArrayDesc<4> desc2;
														
 
															-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
														
 
															-                                      &desc2);
														
 
															-  const RuntimeShape extended_output_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, output_shape);
														
 
															-
														
 
															-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
														
 
															-  // col, channel), with extents (batches, height, width, depth), with the
														
 
															-  // trailing dimension changing most rapidly (channels has the smallest stride,
														
 
															-  // typically 1 element).
														
 
															-  //
														
 
															-  // In generated C code, we store arrays with the dimensions reversed. The
														
 
															-  // first dimension has smallest stride.
														
 
															-  //
														
 
															-  // We name our variables by their Tensorflow convention, but generate C code
														
 
															-  // nesting loops such that the innermost loop has the smallest stride for the
														
 
															-  // best cache behavior.
														
 
															-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
														
 
															-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
														
 
															-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
														
 
															-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
														
 
															-          output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func(
														
 
															-              input1_data[SubscriptToIndex(desc1, b, y, x, c)],
														
 
															-              input2_data[SubscriptToIndex(desc2, b, y, x, c)], params);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) {
														
 
															-  const int32_t input1_val = params.input1_offset + x;
														
 
															-  const int32_t input2_val = params.input2_offset + y;
														
 
															-  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
														
 
															-  const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
														
 
															-  const int32_t scaled_input1_val =
														
 
															-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-          shifted_input1_val, params.input1_multiplier, params.input1_shift);
														
 
															-  const int32_t scaled_input2_val =
														
 
															-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-          shifted_input2_val, params.input2_multiplier, params.input2_shift);
														
 
															-  const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
														
 
															-  const int32_t raw_output =
														
 
															-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-          raw_sum, params.output_multiplier, params.output_shift) +
														
 
															-      params.output_offset;
														
 
															-  const int32_t clamped_output =
														
 
															-      std::min(params.quantized_activation_max,
														
 
															-               std::max(params.quantized_activation_min, raw_output));
														
 
															-  return static_cast<int8_t>(clamped_output);
														
 
															-}
														
 
															-
														
 
															-// Element-wise add that can often be used for inner loop of broadcast add as
														
 
															-// well as the non-broadcast add.
														
 
															-inline void AddElementwise(int size, const ArithmeticParams& params,
														
 
															-                           const int8_t* input1_data, const int8_t* input2_data,
														
 
															-                           int8_t* output_data) {
														
 
															-  ElementWise(size, params, input1_data, input2_data, output_data,
														
 
															-              CheckArithmeticParams, AddFunc);
														
 
															-}
														
 
															-
														
 
															-inline void Add(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const int8_t* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const int8_t* input2_data,
														
 
															-                const RuntimeShape& output_shape, int8_t* output_data) {
														
 
															-  CheckArithmeticParams(params);
														
 
															-
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-
														
 
															-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
														
 
															-}
														
 
															-
														
 
															-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
														
 
															-                               const RuntimeShape& input1_shape,
														
 
															-                               const int8_t* input1_data,
														
 
															-                               const RuntimeShape& input2_shape,
														
 
															-                               const int8_t* input2_data,
														
 
															-                               const RuntimeShape& output_shape,
														
 
															-                               int8_t* output_data) {
														
 
															-  BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape,
														
 
															-                                input2_data, output_shape, output_data,
														
 
															-                                CheckArithmeticParams, AddFunc);
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
@@ -1,238 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-// Fixed-point per-channel-quantization convolution reference kernel.
														
 
															-inline void ConvPerChannel(
														
 
															-    const ConvParams& params, const int32_t* output_multiplier,
														
 
															-    const int32_t* output_shift, const RuntimeShape& input_shape,
														
 
															-    const int8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    int8_t* output_data) {
														
 
															-  // Get parameters.
														
 
															-  const int32_t input_offset = params.input_offset;  // r = s(q - Z)
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-
														
 
															-  // Set min and max value of the output.
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-
														
 
															-  // Consistency check.
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
														
 
															-  if (bias_data) {
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-  }
														
 
															-
														
 
															-  // Check dimensions of the tensors.
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int filter_input_depth = filter_shape.Dims(3);
														
 
															-  const int groups = input_depth / filter_input_depth;
														
 
															-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
														
 
															-  const int filters_per_group = output_depth / groups;
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
														
 
															-          auto group = out_channel / filters_per_group;
														
 
															-          int32_t acc = 0;
														
 
															-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
														
 
															-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-
														
 
															-              // Zero padding by omitting the areas outside the image.
														
 
															-              const bool is_point_inside_image =
														
 
															-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                  (in_y < input_height);
														
 
															-
														
 
															-              if (!is_point_inside_image) {
														
 
															-                continue;
														
 
															-              }
														
 
															-
														
 
															-              for (int in_channel = 0; in_channel < filter_input_depth;
														
 
															-                   ++in_channel) {
														
 
															-                int32_t input_val =
														
 
															-                    input_data[Offset(input_shape, batch, in_y, in_x,
														
 
															-                                      in_channel + group * filter_input_depth)];
														
 
															-                int32_t filter_val = filter_data[Offset(
														
 
															-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
														
 
															-                // Accumulate with 32 bits accumulator.
														
 
															-                // In the nudging process during model quantization, we force
														
 
															-                // real value of 0.0 be represented by a quantized value. This
														
 
															-                // guarantees that the input_offset is a int8_t, even though
														
 
															-                // it is represented using int32_t. int32_t += int8_t *
														
 
															-                // (int8_t - int8_t) so the highest value we can get from each
														
 
															-                // accumulation is [-127, 127] * ([-128, 127] -
														
 
															-                // [-128, 127]), which is [-32512, 32512]. log2(32512)
														
 
															-                // = 14.98, which means we can accumulate at least 2^16
														
 
															-                // multiplications without overflow. The accumulator is
														
 
															-                // applied to a filter so the accumulation logic will hold as
														
 
															-                // long as the filter size (filter_y * filter_x * in_channel)
														
 
															-                // does not exceed 2^16, which is the case in all the models
														
 
															-                // we have seen so far.
														
 
															-                // TODO(b/174275578): Add a check to make sure the
														
 
															-                // accumulator depth is smaller than 2^16.
														
 
															-                acc += filter_val * (input_val + input_offset);
														
 
															-              }
														
 
															-            }
														
 
															-          }
														
 
															-
														
 
															-          if (bias_data) {
														
 
															-            acc += bias_data[out_channel];
														
 
															-          }
														
 
															-          acc = MultiplyByQuantizedMultiplier(
														
 
															-              acc, output_multiplier[out_channel], output_shift[out_channel]);
														
 
															-          acc += output_offset;
														
 
															-          acc = std::max(acc, output_activation_min);
														
 
															-          acc = std::min(acc, output_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
														
 
															-              static_cast<int8_t>(acc);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Fixed-point per-channel-quantization convolution reference kernel.
														
 
															-// 16-bit data and 8-bit filter
														
 
															-template <typename AccumScalar>
														
 
															-inline void ConvPerChannel(
														
 
															-    const ConvParams& params, const int32_t* output_multiplier,
														
 
															-    const int32_t* output_shift, const RuntimeShape& input_shape,
														
 
															-    const int16_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
														
 
															-    int16_t* output_data) {
														
 
															-  // Get parameters.
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-
														
 
															-  // Set min and max value of the output.
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-
														
 
															-  // Consistency check.
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
														
 
															-  if (bias_data) {
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-  }
														
 
															-
														
 
															-  // Check dimensions of the tensors.
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int filter_input_depth = filter_shape.Dims(3);
														
 
															-  const int groups = input_depth / filter_input_depth;
														
 
															-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
														
 
															-  const int filters_per_group = output_depth / groups;
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
														
 
															-          auto group = out_channel / filters_per_group;
														
 
															-          AccumScalar acc = 0;
														
 
															-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
														
 
															-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-
														
 
															-              // Zero padding by omitting the areas outside the image.
														
 
															-              const bool is_point_inside_image =
														
 
															-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                  (in_y < input_height);
														
 
															-
														
 
															-              if (!is_point_inside_image) {
														
 
															-                continue;
														
 
															-              }
														
 
															-
														
 
															-              for (int in_channel = 0; in_channel < filter_input_depth;
														
 
															-                   ++in_channel) {
														
 
															-                int32_t input_val =
														
 
															-                    input_data[Offset(input_shape, batch, in_y, in_x,
														
 
															-                                      in_channel + group * filter_input_depth)];
														
 
															-                int32_t filter_val = filter_data[Offset(
														
 
															-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
														
 
															-                // Accumulate with 64 bits accumulator.
														
 
															-                // int64_t += int8_t * int16_t so the highest value we can
														
 
															-                // get from each accumulation is [-127, 127] * ([-32768,
														
 
															-                // 32767] -
														
 
															-                // [-32768, 32767]), which is [-8322945, 8322945].
														
 
															-                // log2(8322945) = 22.99.
														
 
															-                acc += filter_val * input_val;
														
 
															-              }
														
 
															-            }
														
 
															-          }
														
 
															-          if (bias_data) {
														
 
															-            acc += bias_data[out_channel];
														
 
															-          }
														
 
															-          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
														
 
															-              acc, output_multiplier[out_channel], output_shift[out_channel]);
														
 
															-          scaled_acc = std::max(scaled_acc, output_activation_min);
														
 
															-          scaled_acc = std::min(scaled_acc, output_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
														
 
															-              static_cast<int16_t>(scaled_acc);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
@@ -1,291 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-inline void DepthwiseConvPerChannel(
														
 
															-    const DepthwiseParams& params, const int32_t* output_multiplier,
														
 
															-    const int32_t* output_shift, const RuntimeShape& input_shape,
														
 
															-    const int8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    int8_t* output_data) {
														
 
															-  // Get parameters.
														
 
															-  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  const int depth_multiplier = params.depth_multiplier;
														
 
															-  const int32_t input_offset = params.input_offset;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-
														
 
															-  // Check dimensions of the tensors.
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
														
 
															-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
														
 
															-          for (int m = 0; m < depth_multiplier; ++m) {
														
 
															-            const int output_channel = m + in_channel * depth_multiplier;
														
 
															-            const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-            const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-            int32_t acc = 0;
														
 
															-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-                const int in_y =
														
 
															-                    in_y_origin + dilation_height_factor * filter_y;
														
 
															-                // Zero padding by omitting the areas outside the image.
														
 
															-                const bool is_point_inside_image =
														
 
															-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                    (in_y < input_height);
														
 
															-                if (is_point_inside_image) {
														
 
															-                  int32_t input_val = input_data[Offset(
														
 
															-                      input_shape, batch, in_y, in_x, in_channel)];
														
 
															-                  int32_t filter_val = filter_data[Offset(
														
 
															-                      filter_shape, 0, filter_y, filter_x, output_channel)];
														
 
															-                  // Accumulate with 32 bits accumulator.
														
 
															-                  // In the nudging process during model quantization, we force
														
 
															-                  // real value of 0.0 be represented by a quantized value. This
														
 
															-                  // guarantees that the input_offset is a int8_t, even though
														
 
															-                  // it is represented using int32_t. int32_t += int8_t *
														
 
															-                  // (int8_t - int8_t) so the highest value we can get from each
														
 
															-                  // accumulation is [-127, 127] * ([-128, 127] -
														
 
															-                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
														
 
															-                  // = 14.98, which means we can accumulate at least 2^16
														
 
															-                  // multiplications without overflow. The accumulator is
														
 
															-                  // applied to a filter so the accumulation logic will hold as
														
 
															-                  // long as the filter size (filter_y * filter_x * in_channel)
														
 
															-                  // does not exceed 2^16, which is the case in all the models
														
 
															-                  // we have seen so far.
														
 
															-                  // TODO(b/174275578): Add a check to make sure the
														
 
															-                  // accumulator depth is smaller than 2^16.
														
 
															-                  acc += filter_val * (input_val + input_offset);
														
 
															-                }
														
 
															-              }
														
 
															-            }
														
 
															-            if (bias_data) {
														
 
															-              acc += bias_data[output_channel];
														
 
															-            }
														
 
															-            acc = MultiplyByQuantizedMultiplier(
														
 
															-                acc, output_multiplier[output_channel],
														
 
															-                output_shift[output_channel]);
														
 
															-            acc += output_offset;
														
 
															-            acc = std::max(acc, output_activation_min);
														
 
															-            acc = std::min(acc, output_activation_max);
														
 
															-            output_data[Offset(output_shape, batch, out_y, out_x,
														
 
															-                               output_channel)] = static_cast<int8_t>(acc);
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void DepthwiseConvPerChannel(
														
 
															-    const DepthwiseParams& params, const int32_t* output_multiplier,
														
 
															-    const int32_t* output_shift, const RuntimeShape& input_shape,
														
 
															-    const int16_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const std::int64_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    int16_t* output_data) {
														
 
															-  // Get parameters.
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  const int depth_multiplier = params.depth_multiplier;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-
														
 
															-  // Check dimensions of the tensors.
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
														
 
															-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
														
 
															-          for (int m = 0; m < depth_multiplier; ++m) {
														
 
															-            const int output_channel = m + in_channel * depth_multiplier;
														
 
															-            const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-            const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-            std::int64_t acc = 0;
														
 
															-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-                const int in_y =
														
 
															-                    in_y_origin + dilation_height_factor * filter_y;
														
 
															-                // Zero padding by omitting the areas outside the image.
														
 
															-                const bool is_point_inside_image =
														
 
															-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                    (in_y < input_height);
														
 
															-                if (is_point_inside_image) {
														
 
															-                  int32_t input_val = input_data[Offset(
														
 
															-                      input_shape, batch, in_y, in_x, in_channel)];
														
 
															-                  int32_t filter_val = filter_data[Offset(
														
 
															-                      filter_shape, 0, filter_y, filter_x, output_channel)];
														
 
															-                  // Accumulate with 64 bits accumulator.
														
 
															-                  // We assume maximum of 2^16 accumulations as with the 8-bit
														
 
															-                  // case so actually the value in the accumulator should not
														
 
															-                  // exceed 40 bits
														
 
															-                  acc += static_cast<int64_t>(filter_val) *
														
 
															-                         static_cast<int64_t>(input_val);
														
 
															-                }
														
 
															-              }
														
 
															-            }
														
 
															-            if (bias_data) {
														
 
															-              acc += bias_data[output_channel];
														
 
															-            }
														
 
															-            int32_t scaled_acc = MultiplyByQuantizedMultiplier(
														
 
															-                acc, output_multiplier[output_channel],
														
 
															-                output_shift[output_channel]);
														
 
															-            scaled_acc = std::max(scaled_acc, output_activation_min);
														
 
															-            scaled_acc = std::min(scaled_acc, output_activation_max);
														
 
															-            output_data[Offset(output_shape, batch, out_y, out_x,
														
 
															-                               output_channel)] =
														
 
															-                static_cast<int16_t>(scaled_acc);
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void DepthwiseConvHybridPerChannel(
														
 
															-    const DepthwiseParams& params, float* scaling_factors_ptr,
														
 
															-    const RuntimeShape& input_shape, const int8_t* input_data,
														
 
															-    const RuntimeShape& filter_shape, const int8_t* filter_data,
														
 
															-    const RuntimeShape& bias_shape, const float* bias_data,
														
 
															-    const RuntimeShape& output_shape, float* output_data,
														
 
															-    const float* per_channel_scale, int32_t* input_offset) {
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int dilation_width_factor = params.dilation_width_factor;
														
 
															-  const int dilation_height_factor = params.dilation_height_factor;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  const int depth_multiplier = params.depth_multiplier;
														
 
															-  const float output_activation_min = params.float_activation_min;
														
 
															-  const float output_activation_max = params.float_activation_max;
														
 
															-  // Check dimensions of the tensors.
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int bias_depth = bias_shape.FlatSize();
														
 
															-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
														
 
															-  TFLITE_DCHECK_EQ(bias_depth, output_depth);
														
 
															-
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
														
 
															-          for (int m = 0; m < depth_multiplier; ++m) {
														
 
															-            const int output_channel = m + in_channel * depth_multiplier;
														
 
															-            const int in_x_origin = (out_x * stride_width) - pad_width;
														
 
															-            const int in_y_origin = (out_y * stride_height) - pad_height;
														
 
															-            int32_t acc = 0;
														
 
															-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
														
 
															-                const int in_y =
														
 
															-                    in_y_origin + dilation_height_factor * filter_y;
														
 
															-                // Zero padding by omitting the areas outside the image.
														
 
															-                const bool is_point_inside_image =
														
 
															-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
														
 
															-                    (in_y < input_height);
														
 
															-                if (is_point_inside_image) {
														
 
															-                  int32_t input_val = input_data[Offset(
														
 
															-                      input_shape, batch, in_y, in_x, in_channel)];
														
 
															-                  int32_t filter_val = filter_data[Offset(
														
 
															-                      filter_shape, 0, filter_y, filter_x, output_channel)];
														
 
															-                  acc += filter_val * (input_val - input_offset[batch]);
														
 
															-                }
														
 
															-              }
														
 
															-            }
														
 
															-            float acc_float = static_cast<float>(acc);
														
 
															-            acc_float *=
														
 
															-                per_channel_scale[output_channel] * scaling_factors_ptr[batch];
														
 
															-            if (bias_data && output_channel < bias_depth) {
														
 
															-              acc_float += bias_data[output_channel];
														
 
															-            }
														
 
															-            output_data[Offset(output_shape, batch, out_y, out_x,
														
 
															-                               output_channel)] =
														
 
															-                ActivationFunctionWithMinMax(acc_float, output_activation_min,
														
 
															-                                             output_activation_max);
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
@@ -1,201 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-// For per-channel functions, since it is defined in quantization spec that
														
 
															-// weights are symmetric
														
 
															-// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
														
 
															-// zero_point (params.weights_offset) is always 0.
														
 
															-// However, for per-tensor functions, params.weights_offset is still applied for
														
 
															-// backward compatibility.
														
 
															-
														
 
															-inline void FullyConnectedPerChannel(
														
 
															-    const FullyConnectedParams& params, const int32_t* output_multiplier,
														
 
															-    const int* output_shift, const RuntimeShape& input_shape,
														
 
															-    const int8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    int8_t* output_data) {
														
 
															-  const int32_t input_offset = params.input_offset;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  const int filter_dim_count = filter_shape.DimensionsCount();
														
 
															-  const int batches = output_shape.Dims(0);
														
 
															-  const int output_depth = output_shape.Dims(1);
														
 
															-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
														
 
															-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int out_c = 0; out_c < output_depth; ++out_c) {
														
 
															-      int32_t acc = 0;
														
 
															-      for (int d = 0; d < accum_depth; ++d) {
														
 
															-        int32_t input_val = input_data[b * accum_depth + d];
														
 
															-        int32_t filter_val = filter_data[out_c * accum_depth + d];
														
 
															-        acc += filter_val * (input_val + input_offset);
														
 
															-      }
														
 
															-      if (bias_data) {
														
 
															-        acc += bias_data[out_c];
														
 
															-      }
														
 
															-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
														
 
															-                                          output_shift[out_c]);
														
 
															-      acc += output_offset;
														
 
															-      acc = std::max(acc, output_activation_min);
														
 
															-      acc = std::min(acc, output_activation_max);
														
 
															-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename AccumScalar>
														
 
															-inline void FullyConnectedPerChannel(
														
 
															-    const FullyConnectedParams& params, const int32_t* output_multiplier,
														
 
															-    const int* output_shift, const RuntimeShape& input_shape,
														
 
															-    const int16_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
														
 
															-    int16_t* output_data) {
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
														
 
															-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  const int filter_dim_count = filter_shape.DimensionsCount();
														
 
															-  const int output_dim_count = output_shape.DimensionsCount();
														
 
															-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
														
 
															-  const int output_depth = output_shape.Dims(output_dim_count - 1);
														
 
															-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
														
 
															-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int out_c = 0; out_c < output_depth; ++out_c) {
														
 
															-      AccumScalar acc = 0;
														
 
															-      for (int d = 0; d < accum_depth; ++d) {
														
 
															-        int32_t input_val = input_data[b * accum_depth + d];
														
 
															-        int32_t filter_val = filter_data[out_c * accum_depth + d];
														
 
															-        acc += filter_val * input_val;
														
 
															-      }
														
 
															-      if (bias_data) {
														
 
															-        acc += bias_data[out_c];
														
 
															-      }
														
 
															-      int32_t acc_scaled = MultiplyByQuantizedMultiplier(
														
 
															-          acc, output_multiplier[out_c], output_shift[out_c]);
														
 
															-      acc_scaled = std::max(acc_scaled, output_activation_min);
														
 
															-      acc_scaled = std::min(acc_scaled, output_activation_max);
														
 
															-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void FullyConnected(
														
 
															-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
														
 
															-    const int8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    int8_t* output_data) {
														
 
															-  const int32_t input_offset = params.input_offset;
														
 
															-  const int32_t filter_offset = params.weights_offset;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-  const int32_t output_multiplier = params.output_multiplier;
														
 
															-  const int output_shift = params.output_shift;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
														
 
															-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  const int filter_dim_count = filter_shape.DimensionsCount();
														
 
															-  const int output_dim_count = output_shape.DimensionsCount();
														
 
															-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
														
 
															-  const int output_depth = output_shape.Dims(output_dim_count - 1);
														
 
															-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
														
 
															-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int out_c = 0; out_c < output_depth; ++out_c) {
														
 
															-      int32_t acc = 0;
														
 
															-      for (int d = 0; d < accum_depth; ++d) {
														
 
															-        int32_t input_val = input_data[b * accum_depth + d];
														
 
															-        int32_t filter_val = filter_data[out_c * accum_depth + d];
														
 
															-        acc += (filter_val + filter_offset) * (input_val + input_offset);
														
 
															-      }
														
 
															-      if (bias_data) {
														
 
															-        acc += bias_data[out_c];
														
 
															-      }
														
 
															-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
														
 
															-      acc += output_offset;
														
 
															-      acc = std::max(acc, output_activation_min);
														
 
															-      acc = std::min(acc, output_activation_max);
														
 
															-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename AccumScalar>
														
 
															-inline void FullyConnected(
														
 
															-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
														
 
															-    const int16_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
														
 
															-    int16_t* output_data) {
														
 
															-  const int32_t filter_offset = params.weights_offset;
														
 
															-  const int32_t output_multiplier = params.output_multiplier;
														
 
															-  const int output_shift = params.output_shift;
														
 
															-  const int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  const int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
														
 
															-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
														
 
															-
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-  const int filter_dim_count = filter_shape.DimensionsCount();
														
 
															-  const int output_dim_count = output_shape.DimensionsCount();
														
 
															-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
														
 
															-  const int output_depth = output_shape.Dims(output_dim_count - 1);
														
 
															-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
														
 
															-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int out_c = 0; out_c < output_depth; ++out_c) {
														
 
															-      AccumScalar acc = 0;
														
 
															-      for (int d = 0; d < accum_depth; ++d) {
														
 
															-        int32_t input_val = input_data[b * accum_depth + d];
														
 
															-        int32_t filter_val = filter_data[out_c * accum_depth + d];
														
 
															-        acc += (filter_val + filter_offset) * input_val;
														
 
															-      }
														
 
															-      if (bias_data) {
														
 
															-        acc += bias_data[out_c];
														
 
															-      }
														
 
															-      int32_t acc_scaled =
														
 
															-          MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
														
 
															-      acc_scaled = std::max(acc_scaled, output_activation_min);
														
 
															-      acc_scaled = std::min(acc_scaled, output_activation_max);
														
 
															-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
@@ -1,67 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
														
 
															-                            int32_t depth, const int8_t* input_data,
														
 
															-                            int8_t* output_data) {
														
 
															-  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
														
 
															-  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
														
 
															-  // The output scale must be in sync with Prepare().
														
 
															-  // Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
														
 
															-  // to [-1, 127/128].
														
 
															-  static constexpr int32_t kOutputScale = 7;
														
 
															-  for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
														
 
															-    // int32_t = (int8_t - int8_t) ^ 2.
														
 
															-    // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
														
 
															-    // safe from overflowing in at least 2^16 steps.
														
 
															-    int32_t acc = 0;
														
 
															-    for (int inner_index = 0; inner_index < depth; ++inner_index) {
														
 
															-      int32_t input =
														
 
															-          input_data[depth * outer_index + inner_index] - input_zero_point;
														
 
															-      acc += input * input;
														
 
															-    }
														
 
															-    int32_t inv_l2norm_multiplier;
														
 
															-    int inv_l2norm_shift;
														
 
															-    GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
														
 
															-                                     &inv_l2norm_shift);
														
 
															-
														
 
															-    for (int inner_index = 0; inner_index < depth; ++inner_index) {
														
 
															-      int32_t input =
														
 
															-          input_data[depth * outer_index + inner_index] - input_zero_point;
														
 
															-
														
 
															-      // Rescale and downcast. Rescale is folded into the division.
														
 
															-      int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
														
 
															-          input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
														
 
															-      output_in_q24 =
														
 
															-          std::min(static_cast<int32_t>(kMaxInt8),
														
 
															-                   std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
														
 
															-      output_data[depth * outer_index + inner_index] =
														
 
															-          static_cast<int8_t>(output_in_q24);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
@@ -1,121 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
														
 
															-                     int32_t input_multiplier, int32_t input_left_shift,
														
 
															-                     int32_t input_size, const int8_t* input_data,
														
 
															-                     int8_t* output_data) {
														
 
															-  // Integer bits must be in sync with Prepare() function.
														
 
															-  static constexpr int32_t kInputIntegerBits = 4;
														
 
															-  static constexpr int32_t kOutputIntegerBits = 8;
														
 
															-  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
														
 
															-  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
														
 
															-  static constexpr int32_t kOutputZeroPoint = -128;
														
 
															-
														
 
															-  for (int i = 0; i < input_size; ++i) {
														
 
															-    const int32_t input =
														
 
															-        static_cast<int32_t>(input_data[i]) - input_zero_point;
														
 
															-    if (input <= -input_range_radius) {
														
 
															-      output_data[i] = kMinInt8;
														
 
															-    } else if (input >= input_range_radius) {
														
 
															-      output_data[i] = kMaxInt8;
														
 
															-    } else {
														
 
															-      const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
														
 
															-          input, input_multiplier, input_left_shift);
														
 
															-      using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
														
 
															-      const int32_t output_in_q0 =
														
 
															-          gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
														
 
															-
														
 
															-      // Rescale and downcast.
														
 
															-      using gemmlowp::RoundingDivideByPOT;
														
 
															-      int32_t output_in_q23 =
														
 
															-          RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
														
 
															-      output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
														
 
															-                                        static_cast<int32_t>(kMinInt8)),
														
 
															-                               static_cast<int32_t>(kMaxInt8));
														
 
															-      output_data[i] = static_cast<int8_t>(output_in_q23);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void Logistic(int32_t input_multiplier, int32_t input_left_shift,
														
 
															-                     int32_t input_size, const int16_t* ptr_input_data,
														
 
															-                     int16_t* ptr_output_data) {
														
 
															-  // We use the LUT for sigmoid and take into account, that
														
 
															-  // tanh(x) = 2*sigmoid(2*x) - 1
														
 
															-
														
 
															-  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
														
 
															-  // In case of general parameter scale, multiplier 3 is taken into account
														
 
															-  // in TanhPrepare function and it is included in
														
 
															-  // input_multiplier already.
														
 
															-
														
 
															-  TFLITE_DCHECK_GE(input_left_shift, 0);
														
 
															-  if (input_multiplier == 0) {  // power of two case
														
 
															-    input_multiplier = 3 << input_left_shift;
														
 
															-    input_left_shift = 0;
														
 
															-  }
														
 
															-
														
 
															-  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
														
 
															-
														
 
															-  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
														
 
															-    int32_t input_data =
														
 
															-        ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
														
 
															-
														
 
															-    // We do interpolation on unsigned values.
														
 
															-    uint32_t abs_input_data = abs(input_data);
														
 
															-
														
 
															-    // We divide by 2 power of 9, because
														
 
															-    // we need to divide by 2 in power of 7 for
														
 
															-    // the input conversion + 1/4 from the scale above.
														
 
															-
														
 
															-    // Define uh as uint32_t type not to make this function overflow.
														
 
															-    uint32_t uh = abs_input_data >> 9;
														
 
															-    uint32_t result;
														
 
															-
														
 
															-    if (uh >= 255) {
														
 
															-      // Saturate to maximum.
														
 
															-      result = 0x7FFF << 10;
														
 
															-    } else {
														
 
															-      uint32_t ua = sigmoid_table_uint16[uh];
														
 
															-      uint32_t ub = sigmoid_table_uint16[uh + 1];
														
 
															-      uint32_t ut = abs_input_data & 0x1ff;
														
 
															-      // Interpolation is done using the fractional bit.
														
 
															-      result = (ua << 9) + ut * (ub - ua);
														
 
															-    }
														
 
															-
														
 
															-    result = (input_data >= 0) ? (result + (1 << 9))
														
 
															-                               : ((1 << (16 + 9)) - result + (1 << 9) - 1);
														
 
															-
														
 
															-    // Back to 16-bit.
														
 
															-    result >>= 10;
														
 
															-
														
 
															-    *ptr_output_data = result;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
@@ -1,79 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-template <typename integer_type>
														
 
															-inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
														
 
															-                 int32_t shift, const RuntimeShape& unextended_input_shape,
														
 
															-                 const integer_type* input_data, int32_t input_zero_point,
														
 
															-                 const RuntimeShape& unextended_output_shape,
														
 
															-                 integer_type* output_data, int32_t output_zero_point) {
														
 
															-  // Current implementation only supports dimension equals 4 and simultaneous
														
 
															-  // reduction over width and height.
														
 
															-  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
														
 
															-  const RuntimeShape input_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
														
 
															-  const RuntimeShape output_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
														
 
															-  const int output_batch = output_shape.Dims(0);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int output_depth = output_shape.Dims(3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int num_elements_in_axis = input_width * input_height;
														
 
															-
														
 
															-  TFLITE_CHECK_EQ(op_params.axis_count, 2);
														
 
															-  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
														
 
															-               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
														
 
															-  TFLITE_CHECK_EQ(output_height, 1);
														
 
															-  TFLITE_CHECK_EQ(output_width, 1);
														
 
															-
														
 
															-  static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
														
 
															-  static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();
														
 
															-
														
 
															-  for (int out_b = 0; out_b < output_batch; ++out_b) {
														
 
															-    for (int out_d = 0; out_d < output_depth; ++out_d) {
														
 
															-      int32_t acc = 0;
														
 
															-      for (int in_h = 0; in_h < input_height; ++in_h) {
														
 
															-        for (int in_w = 0; in_w < input_width; ++in_w) {
														
 
															-          acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
														
 
															-                 input_zero_point;
														
 
															-        }
														
 
															-      }
														
 
															-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
														
 
															-      acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
														
 
															-                    : (acc - num_elements_in_axis / 2) / num_elements_in_axis;
														
 
															-      acc += output_zero_point;
														
 
															-      acc = std::min(std::max(acc, kMinInt), kMaxInt);
														
 
															-      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
														
 
															-          static_cast<integer_type>(acc);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
@@ -1,133 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "fixedpoint/fixedpoint.h"
														
 
															-#include "ruy/profiler/instrumentation.h"  // from @ruy
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void MulElementwise(int size, const ArithmeticParams& params,
														
 
															-                           const T* input1_data, const T* input2_data,
														
 
															-                           T* output_data) {
														
 
															-  for (int i = 0; i < size; ++i) {
														
 
															-    const int32_t input1_val = params.input1_offset + input1_data[i];
														
 
															-    const int32_t input2_val = params.input2_offset + input2_data[i];
														
 
															-    const int32_t unclamped_result =
														
 
															-        params.output_offset +
														
 
															-        MultiplyByQuantizedMultiplier(input1_val * input2_val,
														
 
															-                                      params.output_multiplier,
														
 
															-                                      params.output_shift);
														
 
															-    const int32_t clamped_output =
														
 
															-        std::min(params.quantized_activation_max,
														
 
															-                 std::max(params.quantized_activation_min, unclamped_result));
														
 
															-    output_data[i] = static_cast<T>(clamped_output);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void Mul(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const T* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const T* input2_data,
														
 
															-                const RuntimeShape& output_shape, T* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  ruy::profiler::ScopeLabel label("Mul/8bit");
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-
														
 
															-  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
														
 
															-}
														
 
															-
														
 
															-// Mul with 16 bit inputs and int8_t outputs.
														
 
															-inline void Mul(const ArithmeticParams& params,
														
 
															-                const RuntimeShape& input1_shape, const int16_t* input1_data,
														
 
															-                const RuntimeShape& input2_shape, const int16_t* input2_data,
														
 
															-                const RuntimeShape& output_shape, int8_t* output_data) {
														
 
															-  ruy::profiler::ScopeLabel label("Mul/Int16Int8");
														
 
															-  int32_t output_offset = params.output_offset;
														
 
															-  int32_t output_activation_min = params.quantized_activation_min;
														
 
															-  int32_t output_activation_max = params.quantized_activation_max;
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-
														
 
															-  const int flat_size =
														
 
															-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; i++) {
														
 
															-    // F0 uses 0 integer bits, range [-1, 1].
														
 
															-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
														
 
															-
														
 
															-    F0 unclamped_result =
														
 
															-        F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
														
 
															-    int16_t rescaled_result =
														
 
															-        gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
														
 
															-    int16_t clamped_result = std::min<int16_t>(
														
 
															-        output_activation_max - output_offset, rescaled_result);
														
 
															-    clamped_result = std::max<int16_t>(output_activation_min - output_offset,
														
 
															-                                       clamped_result);
														
 
															-    output_data[i] = output_offset + clamped_result;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void BroadcastMul4DSlow(
														
 
															-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
														
 
															-    const T* input1_data, const RuntimeShape& input2_shape,
														
 
															-    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
														
 
															-  ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
														
 
															-
														
 
															-  NdArrayDesc<4> desc1;
														
 
															-  NdArrayDesc<4> desc2;
														
 
															-  // The input shapes are extended as part of NdArrayDesc initialization.
														
 
															-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
														
 
															-                                      &desc2);
														
 
															-  const RuntimeShape extended_output_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, output_shape);
														
 
															-
														
 
															-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
														
 
															-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
														
 
															-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
														
 
															-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
														
 
															-          const int32_t input1_val =
														
 
															-              params.input1_offset +
														
 
															-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
														
 
															-          const int32_t input2_val =
														
 
															-              params.input2_offset +
														
 
															-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
														
 
															-          const int32_t unclamped_result =
														
 
															-              params.output_offset +
														
 
															-              MultiplyByQuantizedMultiplier(input1_val * input2_val,
														
 
															-                                            params.output_multiplier,
														
 
															-                                            params.output_shift);
														
 
															-          const int32_t clamped_output = std::min(
														
 
															-              params.quantized_activation_max,
														
 
															-              std::max(params.quantized_activation_min, unclamped_result));
														
 
															-          output_data[Offset(extended_output_shape, b, y, x, c)] =
														
 
															-              static_cast<T>(clamped_output);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
@@ -1,264 +0,0 @@
 
															-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-inline bool AveragePool(const PoolParams& params,
														
 
															-                        const RuntimeShape& input_shape,
														
 
															-                        const int8_t* input_data,
														
 
															-                        const RuntimeShape& output_shape, int8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          int32_t acc = 0;
														
 
															-          int filter_count = 0;
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              acc +=
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
														
 
															-              filter_count++;
														
 
															-            }
														
 
															-          }
														
 
															-          if (filter_count == 0) return false;
														
 
															-          // Round to the closest integer value.
														
 
															-          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
														
 
															-                        : (acc - filter_count / 2) / filter_count;
														
 
															-          acc = std::max(acc, params.quantized_activation_min);
														
 
															-          acc = std::min(acc, params.quantized_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              static_cast<int8_t>(acc);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-  return true;
														
 
															-}
														
 
															-
														
 
															-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
														
 
															-                    const int8_t* input_data, const RuntimeShape& output_shape,
														
 
															-                    int8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  TFLITE_DCHECK_GE(params.quantized_activation_min,
														
 
															-                   std::numeric_limits<int8_t>::min());
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_max,
														
 
															-                   std::numeric_limits<int8_t>::max());
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          int8_t max = std::numeric_limits<int8_t>::lowest();
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              max = std::max(
														
 
															-                  max,
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
														
 
															-            }
														
 
															-          }
														
 
															-          max = std::max<int8_t>(max, params.quantized_activation_min);
														
 
															-          max = std::min<int8_t>(max, params.quantized_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              static_cast<int8_t>(max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline bool AveragePool(const PoolParams& params,
														
 
															-                        const RuntimeShape& input_shape,
														
 
															-                        const int16_t* input_data,
														
 
															-                        const RuntimeShape& output_shape,
														
 
															-                        int16_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          int32_t acc = 0;
														
 
															-          int filter_count = 0;
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              acc +=
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
														
 
															-              filter_count++;
														
 
															-            }
														
 
															-          }
														
 
															-          if (filter_count == 0) return false;
														
 
															-          // Round to the closest integer value.
														
 
															-          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
														
 
															-                        : (acc - filter_count / 2) / filter_count;
														
 
															-          acc = std::max(acc, params.quantized_activation_min);
														
 
															-          acc = std::min(acc, params.quantized_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              static_cast<int16_t>(acc);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-  return true;
														
 
															-}
														
 
															-
														
 
															-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
														
 
															-                    const int16_t* input_data, const RuntimeShape& output_shape,
														
 
															-                    int16_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  TFLITE_DCHECK_GE(params.quantized_activation_min,
														
 
															-                   std::numeric_limits<int16_t>::min());
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_max,
														
 
															-                   std::numeric_limits<int16_t>::max());
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          int16_t max = std::numeric_limits<int16_t>::lowest();
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              max = std::max(
														
 
															-                  max,
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
														
 
															-            }
														
 
															-          }
														
 
															-          max = std::max<int16_t>(max, params.quantized_activation_min);
														
 
															-          max = std::min<int16_t>(max, params.quantized_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              static_cast<int16_t>(max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
@@ -1,117 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "fixedpoint/fixedpoint.h"
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
														
 
															-                 int32_t input_multiplier, int32_t input_shift,
														
 
															-                 const RuntimeShape& input_shape, const int8_t* input_data,
														
 
															-                 const RuntimeShape& output_shape, int8_t* output_data) {
														
 
															-  // Integer bits must be in sync with Prepare() function.
														
 
															-  static constexpr int32_t kInputIntegerBits = 4;
														
 
															-  static constexpr int32_t kOutputScale = 7;
														
 
															-  static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
														
 
															-  static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
														
 
															-  using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
														
 
															-
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    const int32_t input =
														
 
															-        static_cast<int32_t>(input_data[i]) - input_zero_point;
														
 
															-    if (input <= -input_range_radius) {
														
 
															-      output_data[i] = kMinInt8;
														
 
															-    } else if (input >= input_range_radius) {
														
 
															-      output_data[i] = kMaxInt8;
														
 
															-    } else {
														
 
															-      const int32_t input_in_q4 =
														
 
															-          MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
														
 
															-      const int32_t output_in_q0 =
														
 
															-          gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
														
 
															-
														
 
															-      // Rescale and downcast.
														
 
															-      using gemmlowp::RoundingDivideByPOT;
														
 
															-      int32_t output_in_q24 =
														
 
															-          RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
														
 
															-      output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
														
 
															-      output_data[i] = static_cast<int8_t>(output_in_q24);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
														
 
															-                 const RuntimeShape& input_shape, const int16_t* ptr_input_data,
														
 
															-                 const RuntimeShape& output_shape, int16_t* ptr_output_data) {
														
 
															-  // We use the LUT for sigmoid and take into account, that
														
 
															-  // tanh(x) = 2*sigmoid(2*x) - 1
														
 
															-
														
 
															-  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
														
 
															-  // In case of general parameter scale, multiplier 3 is taken into account
														
 
															-  // in TanhPrepare function and it is included in
														
 
															-  // input_multiplier already.
														
 
															-
														
 
															-  if (input_multiplier == 0) {  // power of two case
														
 
															-    input_multiplier = 3 << input_left_shift;
														
 
															-    input_left_shift = 0;
														
 
															-  }
														
 
															-
														
 
															-  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
														
 
															-
														
 
															-  int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
														
 
															-    int32_t input_data =
														
 
															-        ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
														
 
															-
														
 
															-    uint32_t abs_input_data = abs(input_data);
														
 
															-    uint32_t uh = abs_input_data >> 8;
														
 
															-    int32_t result;
														
 
															-
														
 
															-    if (uh >= 255) {
														
 
															-      // Saturate to maximum.
														
 
															-      result = 0xFFFF << 8;
														
 
															-    } else {
														
 
															-      uint32_t ua = sigmoid_table_uint16[uh];
														
 
															-      uint32_t ub = sigmoid_table_uint16[uh + 1];
														
 
															-
														
 
															-      uint8_t ut = abs_input_data & 0xFF;
														
 
															-
														
 
															-      result = (ua << 8) + ut * (ub - ua);
														
 
															-    }
														
 
															-
														
 
															-    result = (input_data >= 0)
														
 
															-                 ? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
														
 
															-                 : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
														
 
															-
														
 
															-    // Convert back to 16-bit.
														
 
															-    result >>= (9 - 1);
														
 
															-
														
 
															-    *ptr_output_data = result;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
@@ -1,224 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_integer_ops {
														
 
															-
														
 
															-// Fixed-point per-channel-quantization transpose convolution reference kernel.
														
 
															-inline void TransposeConv(
														
 
															-    const ConvParams& params, const int32_t* output_multiplier,
														
 
															-    const int32_t* output_shift, const RuntimeShape& input_shape,
														
 
															-    const int8_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const int32_t* bias_data, const RuntimeShape& output_shape,
														
 
															-    int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
														
 
															-    int32_t* scratch_buffer) {
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  (void)im2col_data;   // only used in optimized code.
														
 
															-  (void)im2col_shape;  // only used in optimized code.
														
 
															-
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
														
 
															-  if (bias_data) {
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-  }
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int32_t input_offset = params.input_offset;
														
 
															-  const int32_t output_offset = params.output_offset;
														
 
															-  const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
														
 
															-  const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-
														
 
															-  const int num_elements = output_shape.FlatSize();
														
 
															-  // We need to initialize scratch_buffer to all 0s, as we apply the same
														
 
															-  // 'scatter' based trick as in float version.
														
 
															-  memset(scratch_buffer, 0, num_elements * sizeof(int32_t));
														
 
															-
														
 
															-  // Loop through input elements one at a time.
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int in_y = 0; in_y < input_height; ++in_y) {
														
 
															-      for (int in_x = 0; in_x < input_width; ++in_x) {
														
 
															-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
														
 
															-          // Loop through the output elements it will influence.
														
 
															-          const int out_x_origin = (in_x * stride_width) - pad_width;
														
 
															-          const int out_y_origin = (in_y * stride_height) - pad_height;
														
 
															-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-              for (int out_channel = 0; out_channel < output_depth;
														
 
															-                   ++out_channel) {
														
 
															-                // Compute output element location.
														
 
															-                const int out_x = out_x_origin + filter_x;
														
 
															-                const int out_y = out_y_origin + filter_y;
														
 
															-                // We cannot accumulate out of bounds.
														
 
															-                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
														
 
															-                    (out_y < output_height)) {
														
 
															-                  const int8_t input_value = input_data[Offset(
														
 
															-                      input_shape, batch, in_y, in_x, in_channel)];
														
 
															-                  const int8_t filter_value =
														
 
															-                      filter_data[Offset(filter_shape, out_channel, filter_y,
														
 
															-                                         filter_x, in_channel)];
														
 
															-                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
														
 
															-                                        out_channel)] +=
														
 
															-                      (input_value + input_offset) * filter_value;
														
 
															-                }
														
 
															-              }
														
 
															-            }
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
														
 
															-          int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
														
 
															-                                              out_channel)];
														
 
															-          if (bias_data) {
														
 
															-            acc += bias_data[out_channel];
														
 
															-          }
														
 
															-          acc = MultiplyByQuantizedMultiplier(
														
 
															-              acc, output_multiplier[out_channel], output_shift[out_channel]);
														
 
															-          acc += output_offset;
														
 
															-          acc = std::max(acc, output_activation_min);
														
 
															-          acc = std::min(acc, output_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
														
 
															-              static_cast<int8_t>(acc);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator
														
 
															-template <typename Scalar>
														
 
															-inline void TransposeConv(
														
 
															-    const ConvParams& params, const int32_t* output_multiplier,
														
 
															-    const int32_t* output_shift, const RuntimeShape& input_shape,
														
 
															-    const int16_t* input_data, const RuntimeShape& filter_shape,
														
 
															-    const int8_t* filter_data, const RuntimeShape& bias_shape,
														
 
															-    const Scalar* bias_data, const RuntimeShape& output_shape,
														
 
															-    int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
														
 
															-    Scalar* scratch_buffer) {
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int pad_width = params.padding_values.width;
														
 
															-  const int pad_height = params.padding_values.height;
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  (void)im2col_data;   // only used in optimized code.
														
 
															-  (void)im2col_shape;  // only used in optimized code.
														
 
															-
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
														
 
															-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
														
 
															-  if (bias_data) {
														
 
															-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
														
 
															-  }
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int filter_height = filter_shape.Dims(1);
														
 
															-  const int filter_width = filter_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
														
 
															-  const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
														
 
															-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
														
 
															-
														
 
															-  const int num_elements = output_shape.FlatSize();
														
 
															-  // We need to initialize scratch_buffer to all 0s, as we apply the same
														
 
															-  // 'scatter' based trick as in float version.
														
 
															-  memset(scratch_buffer, 0, num_elements * sizeof(Scalar));
														
 
															-
														
 
															-  // Loop through input elements one at a time.
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int in_y = 0; in_y < input_height; ++in_y) {
														
 
															-      for (int in_x = 0; in_x < input_width; ++in_x) {
														
 
															-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
														
 
															-          // Loop through the output elements it will influence.
														
 
															-          const int out_x_origin = (in_x * stride_width) - pad_width;
														
 
															-          const int out_y_origin = (in_y * stride_height) - pad_height;
														
 
															-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
														
 
															-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
														
 
															-              for (int out_channel = 0; out_channel < output_depth;
														
 
															-                   ++out_channel) {
														
 
															-                // Compute output element location.
														
 
															-                const int out_x = out_x_origin + filter_x;
														
 
															-                const int out_y = out_y_origin + filter_y;
														
 
															-                // We cannot accumulate out of bounds.
														
 
															-                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
														
 
															-                    (out_y < output_height)) {
														
 
															-                  const int32_t input_value = input_data[Offset(
														
 
															-                      input_shape, batch, in_y, in_x, in_channel)];
														
 
															-                  const int32_t filter_value =
														
 
															-                      filter_data[Offset(filter_shape, out_channel, filter_y,
														
 
															-                                         filter_x, in_channel)];
														
 
															-                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
														
 
															-                                        out_channel)] +=
														
 
															-                      input_value * filter_value;
														
 
															-                }
														
 
															-              }
														
 
															-            }
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
														
 
															-          Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
														
 
															-                                             out_channel)];
														
 
															-          if (bias_data) {
														
 
															-            acc += bias_data[out_channel];
														
 
															-          }
														
 
															-          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
														
 
															-              acc, output_multiplier[out_channel], output_shift[out_channel]);
														
 
															-          scaled_acc = std::max(scaled_acc, output_activation_min);
														
 
															-          scaled_acc = std::min(scaled_acc, output_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
														
 
															-              static_cast<int16_t>(scaled_acc);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_integer_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h
@@ -1,90 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <cmath>
														
 
															-
														
 
															-#include "tensorflow/lite/c/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
														
 
															-                            const RuntimeShape& input_shape,
														
 
															-                            const float* input_data,
														
 
															-                            const RuntimeShape& output_shape,
														
 
															-                            float* output_data, float epsilon = 1e-6) {
														
 
															-  const int trailing_dim = input_shape.DimensionsCount() - 1;
														
 
															-  const int outer_size =
														
 
															-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
														
 
															-  const int depth =
														
 
															-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
														
 
															-  for (int i = 0; i < outer_size; ++i) {
														
 
															-    float squared_l2_norm = 0;
														
 
															-    for (int c = 0; c < depth; ++c) {
														
 
															-      const float val = input_data[depth * i + c];
														
 
															-      squared_l2_norm += val * val;
														
 
															-    }
														
 
															-    float l2_norm = std::sqrt(squared_l2_norm);
														
 
															-    l2_norm = std::max(l2_norm, epsilon);
														
 
															-    for (int c = 0; c < depth; ++c) {
														
 
															-      output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
														
 
															-                            const RuntimeShape& input_shape,
														
 
															-                            const uint8_t* input_data,
														
 
															-                            const RuntimeShape& output_shape,
														
 
															-                            uint8_t* output_data) {
														
 
															-  const int trailing_dim = input_shape.DimensionsCount() - 1;
														
 
															-  const int depth =
														
 
															-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
														
 
															-  const int outer_size =
														
 
															-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
														
 
															-  const int32_t input_zero_point = op_params.input_zero_point;
														
 
															-
														
 
															-  for (int i = 0; i < outer_size; ++i) {
														
 
															-    int32_t square_l2_norm = 0;
														
 
															-    for (int c = 0; c < depth; c++) {
														
 
															-      int32_t diff = input_data[depth * i + c] - input_zero_point;
														
 
															-      square_l2_norm += diff * diff;
														
 
															-    }
														
 
															-    int32_t inv_l2norm_multiplier;
														
 
															-    int inv_l2norm_shift;
														
 
															-    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
														
 
															-                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
														
 
															-    for (int c = 0; c < depth; c++) {
														
 
															-      int32_t diff = input_data[depth * i + c] - input_zero_point;
														
 
															-      int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
														
 
															-      int32_t unclamped_output_val = 128 + rescaled_diff;
														
 
															-      int32_t output_val =
														
 
															-          std::min(static_cast<int32_t>(255),
														
 
															-                   std::max(static_cast<int32_t>(0), unclamped_output_val));
														
 
															-      output_data[depth * i + c] = static_cast<uint8_t>(output_val);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h
@@ -1,69 +0,0 @@
 
															-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void LeakyRelu(const tflite::LeakyReluParams& params,
														
 
															-                      const RuntimeShape& input_shape, const float* input_data,
														
 
															-                      const RuntimeShape& output_shape, float* output_data) {
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    const float val = input_data[i];
														
 
															-    // Note that alpha might be > 1 or < 0, so we don't use std::max here.
														
 
															-    output_data[i] = val > 0 ? val : val * params.alpha;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void QuantizeLeakyRelu(const LeakyReluParams& params,
														
 
															-                              const RuntimeShape& input_shape,
														
 
															-                              const T* input_data,
														
 
															-                              const RuntimeShape& output_shape,
														
 
															-                              T* output_data) {
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-  static const int32_t quantized_min = std::numeric_limits<T>::min();
														
 
															-  static const int32_t quantized_max = std::numeric_limits<T>::max();
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    const int32_t input_value = input_data[i] - params.input_offset;
														
 
															-    int32_t unclamped_output;
														
 
															-    if (input_value >= 0) {
														
 
															-      unclamped_output = params.output_offset +
														
 
															-                         MultiplyByQuantizedMultiplier(
														
 
															-                             input_value, params.output_multiplier_identity,
														
 
															-                             params.output_shift_identity);
														
 
															-    } else {
														
 
															-      unclamped_output = params.output_offset +
														
 
															-                         MultiplyByQuantizedMultiplier(
														
 
															-                             input_value, params.output_multiplier_alpha,
														
 
															-                             params.output_shift_alpha);
														
 
															-    }
														
 
															-    const T clamped_output =
														
 
															-        std::min(quantized_max, std::max(quantized_min, unclamped_output));
														
 
															-    output_data[i] = static_cast<T>(clamped_output);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h
@@ -1,256 +0,0 @@
 
															-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <cstddef>
														
 
															-#include <limits>
														
 
															-
														
 
															-#include "fixedpoint/fixedpoint.h"
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void LogSoftmax(const SoftmaxParams& params,
														
 
															-                       const RuntimeShape& input_shape, const float* input_data,
														
 
															-                       const RuntimeShape& output_shape, float* output_data) {
														
 
															-  const int trailing_dim = input_shape.DimensionsCount() - 1;
														
 
															-  const int outer_size =
														
 
															-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
														
 
															-  const int depth =
														
 
															-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
														
 
															-
														
 
															-  for (int i = 0; i < outer_size; ++i) {
														
 
															-    // Find max element value which we'll use to ensure numerical stability
														
 
															-    // taking advantage of the following equality:
														
 
															-    // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
														
 
															-    float max = std::numeric_limits<float>::lowest();
														
 
															-    for (int c = 0; c < depth; ++c) {
														
 
															-      max = std::max(max, input_data[i * depth + c]);
														
 
															-    }
														
 
															-
														
 
															-    // Compute sum.
														
 
															-    float sum = 0.f;
														
 
															-    for (int c = 0; c < depth; ++c) {
														
 
															-      sum += std::exp(input_data[i * depth + c] - max);
														
 
															-    }
														
 
															-
														
 
															-    // Compute result.
														
 
															-    const float log_sum = std::log(sum);
														
 
															-    for (int c = 0; c < depth; ++c) {
														
 
															-      output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void LogSoftmax(const SoftmaxParams& params,
														
 
															-                       const RuntimeShape& input_shape,
														
 
															-                       const uint8_t* input_data,
														
 
															-                       const RuntimeShape& output_shape, uint8_t* output_data) {
														
 
															-  const int32_t input_multiplier = params.input_multiplier;
														
 
															-  const int32_t input_left_shift = params.input_left_shift;
														
 
															-  const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
														
 
															-  const int32_t reverse_scaling_right_shift =
														
 
															-      params.reverse_scaling_right_shift;
														
 
															-  const int diff_min = params.diff_min;
														
 
															-  // The representation chosen for the input to the exp() function is Q5.26.
														
 
															-  // We need to leave extra space since values that we skip might be as large
														
 
															-  // as -32 before multiplying by input_beta_multiplier, and therefore as
														
 
															-  // large as -16 afterwards.  Note that exp(-8) is definitely not
														
 
															-  // insignificant to accumulation, but exp(-16) definitely is.
														
 
															-  static constexpr int kScaledDiffIntegerBits = 5;
														
 
															-  static constexpr int kAccumulationIntegerBits = 12;
														
 
															-  static constexpr int kOutputIntegerBits = 4;
														
 
															-  using FixedPointScaledDiff =
														
 
															-      gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
														
 
															-  using FixedPointAccum =
														
 
															-      gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
														
 
															-
														
 
															-  const int trailing_dim = input_shape.DimensionsCount() - 1;
														
 
															-  const int outer_size =
														
 
															-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
														
 
															-  const int depth =
														
 
															-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
														
 
															-
														
 
															-  for (int i = 0; i < outer_size; ++i) {
														
 
															-    uint8_t max_in_row = 0;
														
 
															-    for (int c = 0; c < depth; ++c) {
														
 
															-      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
														
 
															-    }
														
 
															-
														
 
															-    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
														
 
															-    for (int c = 0; c < depth; ++c) {
														
 
															-      int32_t input_diff =
														
 
															-          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
														
 
															-      if (input_diff >= diff_min) {
														
 
															-        const int32_t input_diff_rescaled =
														
 
															-            MultiplyByQuantizedMultiplierGreaterThanOne(
														
 
															-                input_diff, input_multiplier, input_left_shift);
														
 
															-        const FixedPointScaledDiff scaled_diff_f8 =
														
 
															-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
														
 
															-        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
														
 
															-                                        exp_on_negative_values(scaled_diff_f8));
														
 
															-      }
														
 
															-    }
														
 
															-
														
 
															-    const int32_t fixed_log_sum_of_exps =
														
 
															-        log_x_for_x_greater_than_or_equal_to_1<kScaledDiffIntegerBits>(
														
 
															-            sum_of_exps)
														
 
															-            .raw();
														
 
															-
														
 
															-    // rescaled_diff_min is smallest representable in
														
 
															-    // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the
														
 
															-    // log-sub-exps that will be subtracted in the loop.
														
 
															-    //
														
 
															-    // The thresholds diff_min, etc are negative.
														
 
															-    const int rescaled_diff_min =
														
 
															-        fixed_log_sum_of_exps + std::numeric_limits<int32_t>::lowest();
														
 
															-    const int adjusted_diff_min =
														
 
															-        std::max(static_cast<int32_t>(
														
 
															-                     diff_min - 1),  // Note use of > below instead of >= above.
														
 
															-                 MultiplyByQuantizedMultiplierSmallerThanOneExp(
														
 
															-                     rescaled_diff_min, reverse_scaling_divisor,
														
 
															-                     -reverse_scaling_right_shift));
														
 
															-
														
 
															-    for (int c = 0; c < depth; ++c) {
														
 
															-      int32_t input_diff =
														
 
															-          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
														
 
															-      if (input_diff > adjusted_diff_min) {
														
 
															-        const int32_t input_diff_rescaled =
														
 
															-            MultiplyByQuantizedMultiplierGreaterThanOne(
														
 
															-                input_diff, input_multiplier, input_left_shift);
														
 
															-        int32_t unsat_output =
														
 
															-            gemmlowp::RoundingDivideByPOT(
														
 
															-                (input_diff_rescaled - fixed_log_sum_of_exps),
														
 
															-                31 - kScaledDiffIntegerBits - kOutputIntegerBits) +
														
 
															-            255;
														
 
															-
														
 
															-        output_data[i * depth + c] = static_cast<uint8_t>(
														
 
															-            std::max(std::min(unsat_output, static_cast<int32_t>(255)),
														
 
															-                     static_cast<int32_t>(0)));
														
 
															-      } else {
														
 
															-        // Set output to smallest value.
														
 
															-        output_data[i * depth + c] = 0;
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void LogSoftmaxQuantized(const SoftmaxParams& params,
														
 
															-                                const size_t outer_size, const size_t depth,
														
 
															-                                const RuntimeShape& input_shape,
														
 
															-                                const T* input_data,
														
 
															-                                const RuntimeShape& output_shape,
														
 
															-                                T* output_data) {
														
 
															-  const int32_t input_multiplier = params.input_multiplier;
														
 
															-  const int32_t input_left_shift = params.input_left_shift;
														
 
															-  const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
														
 
															-  const int32_t reverse_scaling_right_shift =
														
 
															-      params.reverse_scaling_right_shift;
														
 
															-  const int diff_min = params.diff_min;
														
 
															-
														
 
															-  static constexpr T kMinT8 = std::numeric_limits<T>::min();
														
 
															-  static constexpr T kMaxT8 = std::numeric_limits<T>::max();
														
 
															-  static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
														
 
															-
														
 
															-  // All IntegerBits must agree with Prepare function.
														
 
															-  // Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
														
 
															-  static constexpr int kInputIntegerBits = 5;
														
 
															-  static constexpr int kAccumulationIntegerBits = 12;
														
 
															-  static constexpr int kOutputIntegerBits = 4;
														
 
															-  using F5 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
														
 
															-  using F12 = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
														
 
															-
														
 
															-  for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) {
														
 
															-    T max_in_row = kMinT8;
														
 
															-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
														
 
															-      max_in_row =
														
 
															-          std::max(max_in_row, input_data[outer_index * depth + inner_index]);
														
 
															-    }
														
 
															-
														
 
															-    // Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
														
 
															-    F12 sum_of_exps_in_q12 = F12::FromRaw(0);
														
 
															-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
														
 
															-      int32_t input_diff =
														
 
															-          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
														
 
															-          max_in_row;
														
 
															-      if (input_diff >= diff_min) {
														
 
															-        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
														
 
															-            input_diff, input_multiplier, input_left_shift);
														
 
															-        sum_of_exps_in_q12 =
														
 
															-            sum_of_exps_in_q12 +
														
 
															-            gemmlowp::Rescale<kAccumulationIntegerBits>(
														
 
															-                exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
														
 
															-      }
														
 
															-    }
														
 
															-
														
 
															-    const int32_t log_sum_of_exps_in_q5 =
														
 
															-        log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
														
 
															-            sum_of_exps_in_q12)
														
 
															-            .raw();
														
 
															-
														
 
															-    // Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
														
 
															-    // smallest representable in Q5.26 plus the log_sum_of_exps.
														
 
															-    const int32_t shifted_log_sum_of_exps_in_q5 =
														
 
															-        log_sum_of_exps_in_q5 + kMinInt32;
														
 
															-    const int32_t adjusted_diff_min =
														
 
															-        std::max(static_cast<int32_t>(diff_min - 1),
														
 
															-                 MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
														
 
															-                                               reverse_scaling_divisor,
														
 
															-                                               -reverse_scaling_right_shift));
														
 
															-
														
 
															-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
														
 
															-      int32_t input_diff =
														
 
															-          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
														
 
															-          max_in_row;
														
 
															-      // Note use of > below instead of >= above.
														
 
															-      if (input_diff > adjusted_diff_min) {
														
 
															-        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
														
 
															-            input_diff, input_multiplier, input_left_shift);
														
 
															-
														
 
															-        // Rescale and downcast.
														
 
															-        int32_t output_in_q27 =
														
 
															-            gemmlowp::RoundingDivideByPOT(
														
 
															-                (input_diff_in_q5 - log_sum_of_exps_in_q5),
														
 
															-                31 - kInputIntegerBits - kOutputIntegerBits) +
														
 
															-            kMaxT8;
														
 
															-
														
 
															-        output_in_q27 =
														
 
															-            std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxT8)),
														
 
															-                     static_cast<int32_t>(kMinT8));
														
 
															-        output_data[outer_index * depth + inner_index] =
														
 
															-            static_cast<T>(output_in_q27);
														
 
															-      } else {
														
 
															-        output_data[outer_index * depth + inner_index] = kMinT8;
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size,
														
 
															-                       const size_t depth, const RuntimeShape& input_shape,
														
 
															-                       const int8_t* input_data,
														
 
															-                       const RuntimeShape& output_shape, int8_t* output_data) {
														
 
															-  LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data,
														
 
															-                      output_shape, output_data);
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h
@@ -1,132 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
														
 
															-
														
 
															-#include <cmath>
														
 
															-
														
 
															-#include "fixedpoint/fixedpoint.h"
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-#include "tensorflow/lite/kernels/internal/quantization_util.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-#include "tensorflow/lite/kernels/op_macros.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
														
 
															-                     const RuntimeShape& output_shape, float* output_data) {
														
 
															-  const float cutoff_upper = 16.619047164916992188f;
														
 
															-  const float cutoff_lower = -9.f;
														
 
															-
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  // Rational for using approximation in reference kernel.
														
 
															-  // 0. This approximation gives enough precision for float.
														
 
															-  // 1. This works around an issue on an embedded chipset where exp() does not
														
 
															-  // return correctly as expected - exp(x) should return inf when overflown
														
 
															-  // not 1.701417   IEEE 754 defines representation for inf.
														
 
															-  // 2. This will speed up calculation and is matching the behavior in the
														
 
															-  // optimized kernels. (check the definition of scalar_logistic_op<float>)
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; i++) {
														
 
															-    float val = input_data[i];
														
 
															-    float result;
														
 
															-    if (val > cutoff_upper) {
														
 
															-      result = 1.0f;
														
 
															-    } else if (val < cutoff_lower) {
														
 
															-      result = std::exp(val);
														
 
															-    } else {
														
 
															-      result = 1.f / (1.f + std::exp(-val));
														
 
															-    }
														
 
															-    output_data[i] = result;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Convenience version that allows, for example, generated-code calls to be
														
 
															-// uniform between data types.
														
 
															-inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
														
 
															-                     const float* input_data, const RuntimeShape& output_shape,
														
 
															-                     float* output_data) {
														
 
															-  // Drop params: not needed.
														
 
															-  Logistic(input_shape, input_data, output_shape, output_data);
														
 
															-}
														
 
															-
														
 
															-inline void Logistic(const LogisticParams& params,
														
 
															-                     const RuntimeShape& input_shape, const int16_t* input_data,
														
 
															-                     const RuntimeShape& output_shape, int16_t* output_data) {
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; i++) {
														
 
															-    // F0 uses 0 integer bits, range [-1, 1].
														
 
															-    // This is the return type of math functions such as tanh, logistic,
														
 
															-    // whose range is in [-1, 1].
														
 
															-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
														
 
															-    // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
														
 
															-    using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
														
 
															-
														
 
															-    const F3 input = F3::FromRaw(input_data[i]);
														
 
															-    F0 output = gemmlowp::logistic(input);
														
 
															-    output_data[i] = output.raw();
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Quantized int8_t logistic activation.  Cheats by dequantizing and
														
 
															-// requantizing around the floating point logistic method.  This implementation
														
 
															-// is slow on platforms without a floating point unit.
														
 
															-
														
 
															-// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
														
 
															-// approach used in TFLite for int8_t Logistic.
														
 
															-inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
														
 
															-                     float input_scale, int input_zero_point,
														
 
															-                     const RuntimeShape& output_shape, int8_t* output_data,
														
 
															-                     float output_scale, int output_zero_point) {
														
 
															-  const float cutoff_upper = 16.619047164916992188f;
														
 
															-  const float cutoff_lower = -9.f;
														
 
															-
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  // Rational for using approximation in reference kernel.
														
 
															-  // 0. This approximation gives enough precision for float.
														
 
															-  // 1. This works around an issue on an embedded chipset where exp() does not
														
 
															-  // return correctly as expected - exp(x) should return inf when overflown
														
 
															-  // not 1.701417   IEEE 754 defines representation for inf.
														
 
															-  // 2. This will speed up calculation and is matching the behavior in the
														
 
															-  // optimized kernels. (check the definition of scalar_logistic_op<float>)
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; i++) {
														
 
															-    // Dequantize.
														
 
															-    float val =
														
 
															-        static_cast<float>((input_data[i] - input_zero_point) * input_scale);
														
 
															-    float result;
														
 
															-    if (val > cutoff_upper) {
														
 
															-      result = 1.0f;
														
 
															-    } else if (val < cutoff_lower) {
														
 
															-      result = std::exp(val);
														
 
															-    } else {
														
 
															-      result = 1.f / (1.f + std::exp(-val));
														
 
															-    }
														
 
															-    // Requantize
														
 
															-    int8_t output =
														
 
															-        static_cast<int8_t>(result / output_scale + output_zero_point);
														
 
															-    output_data[i] = output;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h
@@ -1,422 +0,0 @@
 
															-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <cmath>
														
 
															-#include <cstdint>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
														
 
															-#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline void LstmCell(
														
 
															-    const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
														
 
															-    const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
														
 
															-    const float* prev_activ_data, const RuntimeShape& weights_shape,
														
 
															-    const float* weights_data, const RuntimeShape& unextended_bias_shape,
														
 
															-    const float* bias_data, const RuntimeShape& unextended_prev_state_shape,
														
 
															-    const float* prev_state_data,
														
 
															-    const RuntimeShape& unextended_output_state_shape, float* output_state_data,
														
 
															-    const RuntimeShape& unextended_output_activ_shape, float* output_activ_data,
														
 
															-    const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data,
														
 
															-    const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) {
														
 
															-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
														
 
															-  const RuntimeShape input_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
														
 
															-  const RuntimeShape prev_activ_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
														
 
															-  const RuntimeShape bias_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
														
 
															-  const RuntimeShape prev_state_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
														
 
															-  const RuntimeShape output_state_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
														
 
															-  const RuntimeShape output_activ_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
														
 
															-  const RuntimeShape concat_temp_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
														
 
															-  const RuntimeShape activ_temp_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
														
 
															-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
														
 
															-
														
 
															-  const int weights_dim_count = weights_shape.DimensionsCount();
														
 
															-  const int batches =
														
 
															-      MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
														
 
															-                  output_state_shape, 0, output_activ_shape, 0);
														
 
															-  const int height =
														
 
															-      MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
														
 
															-                  output_state_shape, 1, output_activ_shape, 1);
														
 
															-  const int width =
														
 
															-      MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
														
 
															-                  output_state_shape, 2, output_activ_shape, 2);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int prev_activ_depth = prev_activ_shape.Dims(3);
														
 
															-  const int total_input_depth = prev_activ_depth + input_depth;
														
 
															-  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
														
 
															-                   total_input_depth);
														
 
															-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
														
 
															-  const int intern_activ_depth =
														
 
															-      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
														
 
															-  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
														
 
															-                   intern_activ_depth * total_input_depth);
														
 
															-  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
														
 
															-  const int output_depth =
														
 
															-      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
														
 
															-                  3, output_activ_shape, 3);
														
 
															-  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
														
 
															-
														
 
															-  // Concatenate prev_activ and input data together
														
 
															-  float const* concat_input_arrays_data[2] = {input_data, prev_activ_data};
														
 
															-  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
														
 
															-                                                       &prev_activ_shape};
														
 
															-  tflite::ConcatenationParams concat_params;
														
 
															-  concat_params.axis = 3;
														
 
															-  concat_params.inputs_count = 2;
														
 
															-  Concatenation(concat_params, concat_input_arrays_shapes,
														
 
															-                concat_input_arrays_data, concat_temp_shape, concat_temp_data);
														
 
															-
														
 
															-  // Fully connected
														
 
															-  tflite::FullyConnectedParams fc_params;
														
 
															-  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
														
 
															-  fc_params.float_activation_max = std::numeric_limits<float>::max();
														
 
															-  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape,
														
 
															-                 weights_data, bias_shape, bias_data, activ_temp_shape,
														
 
															-                 activ_temp_data);
														
 
															-
														
 
															-  // Memory state update (the LSTM "guts")
														
 
															-  for (int b = 0; b < batches; ++b) {
														
 
															-    for (int w = 0; w < width; ++w) {
														
 
															-      for (int h = 0; h < height; ++h) {
														
 
															-        for (int c = 0; c < output_depth; ++c) {
														
 
															-          const float input_gate =
														
 
															-              1.f /
														
 
															-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
														
 
															-                                                      0 * output_depth + c)]));
														
 
															-          const float new_input = std::tanh(activ_temp_data[Offset(
														
 
															-              activ_temp_shape, b, h, w, 1 * output_depth + c)]);
														
 
															-          const float forget_gate =
														
 
															-              1.f /
														
 
															-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
														
 
															-                                                      2 * output_depth + c)]));
														
 
															-          const float output_gate =
														
 
															-              1.f /
														
 
															-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
														
 
															-                                                      3 * output_depth + c)]));
														
 
															-          const float new_state =
														
 
															-              input_gate * new_input +
														
 
															-              forget_gate *
														
 
															-                  prev_state_data[Offset(prev_state_shape, b, h, w, c)];
														
 
															-          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
														
 
															-          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
														
 
															-              output_gate * std::tanh(new_state);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Quantized LSTM cell implementation.
														
 
															-// The quantization of the input, output arrays is as follows:
														
 
															-//  - The input activations are quantized as uint8 on the interval
														
 
															-//    [-1, 127/128].
														
 
															-//    The rationale for that is that is the natural interval for output
														
 
															-//    activations (see next point) and these need to be concatenated together.
														
 
															-//    We could accommodate different ranges by re-scaling, but we empirically
														
 
															-//    found that setting the input activations range to be [-1, 127/128] in the
														
 
															-//    first place, removing the need for re-scaling, greatly improves accuracy.
														
 
															-//  - The output activations are quantized as uint8 on the interval
														
 
															-//    [-1, 127/128].
														
 
															-//    The rationale for that is that the definition of a LSTM cell makes them
														
 
															-//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
														
 
															-//    makes for simpler, more accurate fixed-point arithmetic.
														
 
															-//  - The output-at-previous-timestep state array is obviously quantized as
														
 
															-//    the output activations.
														
 
															-//  - The internal LSTM memory (not the output-at-previous-timestep, the other
														
 
															-//    internal state array) is int16-quantized and may use any power-of-two,
														
 
															-//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
														
 
															-//    StateIntegerBits below, see the below discussion of that template
														
 
															-//    parameter ("The StateIntegerBits template parameter").
														
 
															-//  - The output of the internal fully-connected node is int16-quantized
														
 
															-//    on the interval [-8, 8 * 32767/32768], the rationale for which is
														
 
															-//    explained just below ("Why [-8, 8] for fully-connected output?").
														
 
															-//
														
 
															-//
														
 
															-// === The StateIntegerBits template parameter ===
														
 
															-//
														
 
															-// The StateIntegerBits template parameter controls the fixed-point format used
														
 
															-// to represent the internal memory of the LSTM cell (not the
														
 
															-// output-at-previous-timestep, the other internal state array). It's currently
														
 
															-// a template parameter so that the model can control that. The most typical
														
 
															-// value for StateIntegerBits is 4. Other plausible values are anywhere between
														
 
															-// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
														
 
															-// and drop that template parameter. The reason why it can't be a runtime
														
 
															-// parameter is that this controls the fixed-point format used, i.e. we need to
														
 
															-// generate actually different code based on it. In particular, we generate code
														
 
															-// for a fixed-point tanh() implementation for that format, which internally
														
 
															-// uses a fixed-point exp() implementation, which internally uses a
														
 
															-// barrel-shifter with a number of steps that depends on StateIntegerBits.
														
 
															-// Another consequence of that is that a higher value of StateIntegerBits
														
 
															-// results in a more expensive implementation (more barrel shifter steps
														
 
															-// needed).
														
 
															-//
														
 
															-//
														
 
															-// === Why [-8, 8] for fully-connected output? ===
														
 
															-//
														
 
															-// This array is only fed to Logistic and Tanh functions, for which
														
 
															-// the quantized implementation will want to use fixed-point arithmetic,
														
 
															-// requiring a power-of-two representation interval. Thus, we should right
														
 
															-// away quantize this array to a power-of-two interval; otherwise,
														
 
															-// implementation will need to rescale that, losing any benefit that a tighter
														
 
															-// representation interval might otherwise yield, while introducing some
														
 
															-// numerical error and computational overhead.
														
 
															-//
														
 
															-// Now, Logistic and Tanh
														
 
															-// are nearly constant (nearly equal to their horizontal asymptotes)
														
 
															-// outside of a small bounded interval around 0:
														
 
															-//
														
 
															-//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
														
 
															-//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
														
 
															-//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
														
 
															-//
														
 
															-// From this, we see that clamping to [-4, 4] would be too inaccurate
														
 
															-// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
														
 
															-// while clamping to [-16, 16] would make no difference even in float32.
														
 
															-// However, for a fixed-point implementation in 16-bit integers, using 5
														
 
															-// integer bits to represent the [-16, 16] range would leave only 11
														
 
															-// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
														
 
															-// representable values. Notice that is higher than the
														
 
															-// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
														
 
															-// Using [-8, 8] thus seems like the better compromise overall, enjoying
														
 
															-// an increment of 2.4e-4 between representable values and a worst-case
														
 
															-// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
														
 
															-// [-16, 16].
														
 
															-//
														
 
															-// Moreover, all other things being equal, it is nice to choose the narrower
														
 
															-// representation range, as that makes the implementation of fixed-point
														
 
															-// math functions a little cheaper (each integer bit requires an additional
														
 
															-// barrel-shifter atep in the implementation of exp(-x)). That is further
														
 
															-// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
														
 
															-// sense for 32-bit float or 32-bit fixed-point quantization, but we are
														
 
															-// aiming for 16-bit fixed-point quantization of these internal nodes here.
														
 
															-//
														
 
															-template <int StateIntegerBits>
														
 
															-inline void LstmCell(const LstmCellParams& params,
														
 
															-                     const RuntimeShape& unextended_input_shape,
														
 
															-                     const uint8_t* input_data_uint8,
														
 
															-                     const RuntimeShape& unextended_prev_activ_shape,
														
 
															-                     const uint8_t* prev_activ_data_uint8,
														
 
															-                     const RuntimeShape& weights_shape,
														
 
															-                     const uint8_t* weights_data_uint8,
														
 
															-                     const RuntimeShape& unextended_bias_shape,
														
 
															-                     const int32_t* bias_data_int32,
														
 
															-                     const RuntimeShape& unextended_prev_state_shape,
														
 
															-                     const int16_t* prev_state_data_int16,
														
 
															-                     const RuntimeShape& unextended_output_state_shape,
														
 
															-                     int16_t* output_state_data_int16,
														
 
															-                     const RuntimeShape& unextended_output_activ_shape,
														
 
															-                     uint8_t* output_activ_data_uint8,
														
 
															-                     const RuntimeShape& unextended_concat_temp_shape,
														
 
															-                     uint8_t* concat_temp_data_uint8,
														
 
															-                     const RuntimeShape& unextended_activ_temp_shape,
														
 
															-                     int16_t* activ_temp_data_int16, void* gemmlowp_context) {
														
 
															-  (void)gemmlowp_context;  // only used in optimized code.
														
 
															-  int32_t weights_zero_point = params.weights_zero_point;
														
 
															-  int32_t accum_multiplier = params.accum_multiplier;
														
 
															-  int accum_shift = params.accum_shift;
														
 
															-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
														
 
															-  const RuntimeShape input_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
														
 
															-  const RuntimeShape prev_activ_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
														
 
															-  const RuntimeShape bias_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
														
 
															-  const RuntimeShape prev_state_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
														
 
															-  const RuntimeShape output_state_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
														
 
															-  const RuntimeShape output_activ_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
														
 
															-  const RuntimeShape concat_temp_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
														
 
															-  const RuntimeShape activ_temp_shape =
														
 
															-      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
														
 
															-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
														
 
															-
														
 
															-  // Gather dimensions information, and perform consistency checks.
														
 
															-  const int weights_dim_count = weights_shape.DimensionsCount();
														
 
															-  const int outer_size = MatchingFlatSizeSkipDim(
														
 
															-      input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape,
														
 
															-      output_activ_shape);
														
 
															-  const int input_depth = input_shape.Dims(3);
														
 
															-  const int prev_activ_depth = prev_activ_shape.Dims(3);
														
 
															-  const int total_input_depth = prev_activ_depth + input_depth;
														
 
															-  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
														
 
															-                   total_input_depth);
														
 
															-  const int intern_activ_depth =
														
 
															-      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
														
 
															-  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
														
 
															-                   intern_activ_depth * total_input_depth);
														
 
															-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
														
 
															-  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
														
 
															-  const int output_depth =
														
 
															-      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
														
 
															-                  3, output_activ_shape, 3);
														
 
															-  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
														
 
															-  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
														
 
															-  const int fc_output_depth =
														
 
															-      MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
														
 
															-  const int fc_accum_depth = total_input_depth;
														
 
															-  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
														
 
															-
														
 
															-  // Depth-concatenate prev_activ and input data together.
														
 
															-  uint8_t const* concat_input_arrays_data[2] = {input_data_uint8,
														
 
															-                                                prev_activ_data_uint8};
														
 
															-  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
														
 
															-                                                       &prev_activ_shape};
														
 
															-  tflite::ConcatenationParams concat_params;
														
 
															-  concat_params.axis = 3;
														
 
															-  concat_params.inputs_count = 2;
														
 
															-  Concatenation(concat_params, concat_input_arrays_shapes,
														
 
															-                concat_input_arrays_data, concat_temp_shape,
														
 
															-                concat_temp_data_uint8);
														
 
															-
														
 
															-  // Implementation of the fully connected node inside the LSTM cell.
														
 
															-  // The operands are 8-bit integers, the accumulators are internally 32bit
														
 
															-  // integers, and the output is 16-bit fixed-point with 3 integer bits so
														
 
															-  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
														
 
															-  // is explained in the function comment above.
														
 
															-  for (int b = 0; b < fc_batches; ++b) {
														
 
															-    for (int out_c = 0; out_c < fc_output_depth; ++out_c) {
														
 
															-      // Internal accumulation.
														
 
															-      // Initialize accumulator with the bias-value.
														
 
															-      int32_t accum = bias_data_int32[out_c];
														
 
															-      // Accumulation loop.
														
 
															-      for (int d = 0; d < fc_accum_depth; ++d) {
														
 
															-        int16_t input_val =
														
 
															-            concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
														
 
															-        int16_t weights_val =
														
 
															-            weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
														
 
															-        accum += input_val * weights_val;
														
 
															-      }
														
 
															-      // Down-scale the final int32 accumulator to the scale used by our
														
 
															-      // (16-bit, using 3 integer bits) fixed-point format. The quantized
														
 
															-      // multiplier and shift here have been pre-computed offline
														
 
															-      // (e.g. by toco).
														
 
															-      accum =
														
 
															-          MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
														
 
															-      // Saturate, cast to int16, and store to the temporary activations array.
														
 
															-      accum = std::max(-32768, std::min(32767, accum));
														
 
															-      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
														
 
															-  // and muls, all done in 16-bit fixed-point.
														
 
															-  for (int b = 0; b < outer_size; ++b) {
														
 
															-    for (int c = 0; c < output_depth; ++c) {
														
 
															-      // Define the fixed-point data types that we will use here. All use
														
 
															-      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
														
 
															-      // They only differ by the number of integral vs. fractional bits,
														
 
															-      // determining the range of values that they can represent.
														
 
															-      //
														
 
															-      // F0 uses 0 integer bits, range [-1, 1].
														
 
															-      // This is the return type of math functions such as tanh, logistic,
														
 
															-      // whose range is in [-1, 1].
														
 
															-      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
														
 
															-      // F3 uses 3 integer bits, range [-8, 8].
														
 
															-      // This is the range of the previous fully-connected node's output,
														
 
															-      // which is our input here.
														
 
															-      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
														
 
															-      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
														
 
															-      // 2^StateIntegerBits]. It's used to represent the internal state, whose
														
 
															-      // number of integer bits is currently dictated by the model. See comment
														
 
															-      // on the StateIntegerBits template parameter above.
														
 
															-      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
														
 
															-      // Implementation of input gate, using fixed-point logistic function.
														
 
															-      F3 input_gate_input = F3::FromRaw(
														
 
															-          activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
														
 
															-      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
														
 
															-      // Implementation of input modulation gate, using fixed-point tanh
														
 
															-      // function.
														
 
															-      F3 input_modulation_gate_input = F3::FromRaw(
														
 
															-          activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
														
 
															-      F0 input_modulation_gate_output =
														
 
															-          gemmlowp::tanh(input_modulation_gate_input);
														
 
															-      // Implementation of forget gate, using fixed-point logistic function.
														
 
															-      F3 forget_gate_input = F3::FromRaw(
														
 
															-          activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
														
 
															-      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
														
 
															-      // Implementation of output gate, using fixed-point logistic function.
														
 
															-      F3 output_gate_input = F3::FromRaw(
														
 
															-          activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
														
 
															-      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
														
 
															-      // Implementation of internal multiplication nodes, still in fixed-point.
														
 
															-      F0 input_times_input_modulation =
														
 
															-          input_gate_output * input_modulation_gate_output;
														
 
															-      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
														
 
															-      FS prev_state_times_forget_state = forget_gate_output * prev_state;
														
 
															-      // Implementation of internal addition node, saturating.
														
 
															-      FS new_state = gemmlowp::SaturatingAdd(
														
 
															-          gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
														
 
															-          prev_state_times_forget_state);
														
 
															-      // Implementation of last internal Tanh node, still in fixed-point.
														
 
															-      // Since a Tanh fixed-point implementation is specialized for a given
														
 
															-      // number or integer bits, and each specialization can have a substantial
														
 
															-      // code size, and we already used above a Tanh on an input with 3 integer
														
 
															-      // bits, and per the table in the above function comment there is no
														
 
															-      // significant accuracy to be lost by clamping to [-8, +8] for a
														
 
															-      // 3-integer-bits representation, let us just do that. This helps people
														
 
															-      // porting this to targets where code footprint must be minimized.
														
 
															-      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
														
 
															-      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
														
 
															-      // Store the new internal state back to memory, as 16-bit integers.
														
 
															-      // Note: here we store the original value with StateIntegerBits, not
														
 
															-      // the rescaled 3-integer-bits value fed to tanh.
														
 
															-      output_state_data_int16[b * output_depth + c] = new_state.raw();
														
 
															-      // Down-scale the output activations to 8-bit integers, saturating,
														
 
															-      // and store back to memory.
														
 
															-      int16_t rescaled_output_activ =
														
 
															-          gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
														
 
															-      int16_t clamped_output_activ = std::max<int16_t>(
														
 
															-          -128, std::min<int16_t>(127, rescaled_output_activ));
														
 
															-      output_activ_data_uint8[b * output_depth + c] =
														
 
															-          128 + clamped_output_activ;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
@@ -1,64 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T, typename Op, int N = 5>
														
 
															-void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
														
 
															-                                 const T* input1_data,
														
 
															-                                 const RuntimeShape& unextended_input2_shape,
														
 
															-                                 const T* input2_data,
														
 
															-                                 const RuntimeShape& unextended_output_shape,
														
 
															-                                 T* output_data, Op op) {
														
 
															-  // Uses element-wise calculation if broadcast is not required.
														
 
															-  if (unextended_input1_shape == unextended_input2_shape) {
														
 
															-    const int flat_size =
														
 
															-        MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
														
 
															-                             unextended_output_shape);
														
 
															-    for (int i = 0; i < flat_size; ++i) {
														
 
															-      output_data[i] = op(input1_data[i], input2_data[i]);
														
 
															-    }
														
 
															-  } else {
														
 
															-    TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
														
 
															-    TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
														
 
															-    TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
														
 
															-
														
 
															-    NdArrayDesc<N> desc1;
														
 
															-    NdArrayDesc<N> desc2;
														
 
															-    NdArrayDesc<N> output_desc;
														
 
															-    NdArrayDescsForElementwiseBroadcast(
														
 
															-        unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
														
 
															-    CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
														
 
															-                   &output_desc);
														
 
															-
														
 
															-    auto maxmin_func = [&](int indexes[N]) {
														
 
															-      output_data[SubscriptToIndex(output_desc, indexes)] =
														
 
															-          op(input1_data[SubscriptToIndex(desc1, indexes)],
														
 
															-             input2_data[SubscriptToIndex(desc2, indexes)]);
														
 
															-    };
														
 
															-    NDOpsHelper<N>(output_desc, maxmin_func);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h
@@ -1,37 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-template <typename T>
														
 
															-inline void Negate(const RuntimeShape& input_shape, const T* input_data,
														
 
															-                   const RuntimeShape& output_shape, T* output_data) {
														
 
															-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
														
 
															-
														
 
															-  for (int i = 0; i < flat_size; ++i) {
														
 
															-    output_data[i] = -input_data[i];
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h
@@ -1,169 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
														
 
															-
														
 
															-#include <vector>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-// TFLite Pad supports activation tensors with up to 5 dimensions.
														
 
															-constexpr int PadKernelMaxDimensionCount() { return 5; }
														
 
															-
														
 
															-// There are two versions of pad: Pad and PadV2.  In PadV2 there is a second
														
 
															-// scalar input that provides the padding value.  Therefore pad_value_ptr can be
														
 
															-// equivalent to a simple input1_data.  For Pad, it should point to a zero
														
 
															-// value.
														
 
															-//
														
 
															-// Note that two typenames are required, so that T=P=int32_t is considered a
														
 
															-// specialization distinct from P=int32_t.
														
 
															-template <typename T, typename P>
														
 
															-inline void PadImpl(const tflite::PadParams& op_params,
														
 
															-                    const RuntimeShape& input_shape, const T* input_data,
														
 
															-                    const P* pad_value_ptr, const RuntimeShape& output_shape,
														
 
															-                    T* output_data) {
														
 
															-  const RuntimeShape ext_input_shape =
														
 
															-      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
														
 
															-  const RuntimeShape ext_output_shape =
														
 
															-      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
														
 
															-  TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
														
 
															-  TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
														
 
															-
														
 
															-  // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
														
 
															-  // pad them to 5 dims (yes, we are "padding the padding").
														
 
															-  int left_padding_copy[PadKernelMaxDimensionCount()];
														
 
															-  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
														
 
															-    left_padding_copy[i] = 0;
														
 
															-  }
														
 
															-  for (int i = 0; i < op_params.left_padding_count; ++i) {
														
 
															-    left_padding_copy[i + PadKernelMaxDimensionCount() -
														
 
															-                      op_params.left_padding_count] = op_params.left_padding[i];
														
 
															-  }
														
 
															-  int right_padding_copy[PadKernelMaxDimensionCount()];
														
 
															-  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
														
 
															-    right_padding_copy[i] = 0;
														
 
															-  }
														
 
															-  for (int i = 0; i < op_params.right_padding_count; ++i) {
														
 
															-    right_padding_copy[i + PadKernelMaxDimensionCount() -
														
 
															-                       op_params.right_padding_count] =
														
 
															-        op_params.right_padding[i];
														
 
															-  }
														
 
															-
														
 
															-  const int output_batch = ext_output_shape.Dims(0);
														
 
															-  const int output_plane = ext_output_shape.Dims(1);
														
 
															-  const int output_height = ext_output_shape.Dims(2);
														
 
															-  const int output_width = ext_output_shape.Dims(3);
														
 
															-  const int output_depth = ext_output_shape.Dims(4);
														
 
															-
														
 
															-  const int left_b_padding = left_padding_copy[0];
														
 
															-  const int left_p_padding = left_padding_copy[1];
														
 
															-  const int left_h_padding = left_padding_copy[2];
														
 
															-  const int left_w_padding = left_padding_copy[3];
														
 
															-  const int left_d_padding = left_padding_copy[4];
														
 
															-
														
 
															-  const int right_b_padding = right_padding_copy[0];
														
 
															-  const int right_p_padding = right_padding_copy[1];
														
 
															-  const int right_h_padding = right_padding_copy[2];
														
 
															-  const int right_w_padding = right_padding_copy[3];
														
 
															-  const int right_d_padding = right_padding_copy[4];
														
 
															-
														
 
															-  const T pad_value = *pad_value_ptr;
														
 
															-
														
 
															-  const T* in_ptr = input_data;
														
 
															-  T* out_ptr = output_data;
														
 
															-  for (int out_b = 0; out_b < output_batch; ++out_b) {
														
 
															-    for (int out_p = 0; out_p < output_plane; ++out_p) {
														
 
															-      for (int out_h = 0; out_h < output_height; ++out_h) {
														
 
															-        for (int out_w = 0; out_w < output_width; ++out_w) {
														
 
															-          for (int out_d = 0; out_d < output_depth; ++out_d) {
														
 
															-            if (out_b < left_b_padding ||
														
 
															-                out_b >= output_batch - right_b_padding ||
														
 
															-                out_p < left_p_padding ||
														
 
															-                out_p >= output_plane - right_p_padding ||
														
 
															-                out_h < left_h_padding ||
														
 
															-                out_h >= output_height - right_h_padding ||
														
 
															-                out_w < left_w_padding ||
														
 
															-                out_w >= output_width - right_w_padding ||
														
 
															-                out_d < left_d_padding ||
														
 
															-                out_d >= output_depth - right_d_padding) {
														
 
															-              *out_ptr++ = pad_value;
														
 
															-            } else {
														
 
															-              *out_ptr++ = *in_ptr++;
														
 
															-            }
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <typename T, typename P>
														
 
															-inline void Pad(const tflite::PadParams& op_params,
														
 
															-                const RuntimeShape& input_shape, const T* input_data,
														
 
															-                const P* pad_value_ptr, const RuntimeShape& output_shape,
														
 
															-                T* output_data) {
														
 
															-  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
														
 
															-          output_data);
														
 
															-}
														
 
															-
														
 
															-// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
														
 
															-template <typename T>
														
 
															-inline void Pad(const tflite::PadParams& op_params,
														
 
															-                const RuntimeShape& input_shape, const T* input_data,
														
 
															-                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
														
 
															-                T* output_data) {
														
 
															-  const T converted_pad_value = static_cast<T>(*pad_value_ptr);
														
 
															-  PadImpl(op_params, input_shape, input_data, &converted_pad_value,
														
 
															-          output_shape, output_data);
														
 
															-}
														
 
															-
														
 
															-// This version avoids conflicting template matching.
														
 
															-template <>
														
 
															-inline void Pad(const tflite::PadParams& op_params,
														
 
															-                const RuntimeShape& input_shape, const int32_t* input_data,
														
 
															-                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
														
 
															-                int32_t* output_data) {
														
 
															-  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
														
 
															-          output_data);
														
 
															-}
														
 
															-
														
 
															-template <typename T, typename P>
														
 
															-inline void PadImageStyle(const tflite::PadParams& op_params,
														
 
															-                          const RuntimeShape& input_shape, const T* input_data,
														
 
															-                          const P* pad_value_ptr,
														
 
															-                          const RuntimeShape& output_shape, T* output_data) {
														
 
															-  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
														
 
															-      output_data);
														
 
															-}
														
 
															-
														
 
															-template <typename P>
														
 
															-inline void PadImageStyle(const tflite::PadParams& op_params,
														
 
															-                          const RuntimeShape& input_shape,
														
 
															-                          const float* input_data, const P* pad_value_ptr,
														
 
															-                          const RuntimeShape& output_shape,
														
 
															-                          float* output_data) {
														
 
															-  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
														
 
															-      output_data);
														
 
															-}
														
 
															-
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h
@@ -1,303 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-#include "tensorflow/lite/kernels/internal/quantization_util.h"
														
 
															-#include "tensorflow/lite/kernels/internal/types.h"
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace reference_ops {
														
 
															-
														
 
															-inline bool AveragePool(const PoolParams& params,
														
 
															-                        const RuntimeShape& input_shape,
														
 
															-                        const float* input_data,
														
 
															-                        const RuntimeShape& output_shape, float* output_data) {
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          float total = 0.f;
														
 
															-          float filter_count = 0;
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              total +=
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
														
 
															-              filter_count++;
														
 
															-            }
														
 
															-          }
														
 
															-          if (filter_count == 0) return false;
														
 
															-          const float average = total / filter_count;
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              ActivationFunctionWithMinMax(average, params.float_activation_min,
														
 
															-                                           params.float_activation_max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-  return true;
														
 
															-}
														
 
															-
														
 
															-inline bool AveragePool(const PoolParams& params,
														
 
															-                        const RuntimeShape& input_shape,
														
 
															-                        const uint8_t* input_data,
														
 
															-                        const RuntimeShape& output_shape,
														
 
															-                        uint8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          int32_t acc = 0;
														
 
															-          int filter_count = 0;
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              acc +=
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
														
 
															-              filter_count++;
														
 
															-            }
														
 
															-          }
														
 
															-          if (filter_count == 0) return false;
														
 
															-          acc = (acc + filter_count / 2) / filter_count;
														
 
															-          acc = std::max(acc, params.quantized_activation_min);
														
 
															-          acc = std::min(acc, params.quantized_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              static_cast<uint8_t>(acc);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-  return true;
														
 
															-}
														
 
															-
														
 
															-inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
														
 
															-                   const float* input_data, const RuntimeShape& output_shape,
														
 
															-                   float* output_data) {
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          float sum_squares = 0.f;
														
 
															-          int filter_count = 0;
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              const float val =
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
														
 
															-              sum_squares += val * val;
														
 
															-              filter_count++;
														
 
															-            }
														
 
															-          }
														
 
															-          const float l2pool_result = std::sqrt(sum_squares / filter_count);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              ActivationFunctionWithMinMax(l2pool_result,
														
 
															-                                           params.float_activation_min,
														
 
															-                                           params.float_activation_max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
														
 
															-                    const float* input_data, const RuntimeShape& output_shape,
														
 
															-                    float* output_data) {
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          float max = std::numeric_limits<float>::lowest();
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              max = std::max(
														
 
															-                  max,
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
														
 
															-            }
														
 
															-          }
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              ActivationFunctionWithMinMax(max, params.float_activation_min,
														
 
															-                                           params.float_activation_max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
														
 
															-                    const uint8_t* input_data, const RuntimeShape& output_shape,
														
 
															-                    uint8_t* output_data) {
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_min,
														
 
															-                   params.quantized_activation_max);
														
 
															-  TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
														
 
															-  TFLITE_DCHECK_LE(params.quantized_activation_max, 255);
														
 
															-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
														
 
															-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
														
 
															-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
														
 
															-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
														
 
															-  const int input_height = input_shape.Dims(1);
														
 
															-  const int input_width = input_shape.Dims(2);
														
 
															-  const int output_height = output_shape.Dims(1);
														
 
															-  const int output_width = output_shape.Dims(2);
														
 
															-  const int stride_height = params.stride_height;
														
 
															-  const int stride_width = params.stride_width;
														
 
															-  for (int batch = 0; batch < batches; ++batch) {
														
 
															-    for (int out_y = 0; out_y < output_height; ++out_y) {
														
 
															-      for (int out_x = 0; out_x < output_width; ++out_x) {
														
 
															-        for (int channel = 0; channel < depth; ++channel) {
														
 
															-          const int in_x_origin =
														
 
															-              (out_x * stride_width) - params.padding_values.width;
														
 
															-          const int in_y_origin =
														
 
															-              (out_y * stride_height) - params.padding_values.height;
														
 
															-          // Compute the boundaries of the filter region clamped so as to
														
 
															-          // ensure that the filter window fits in the input array.
														
 
															-          const int filter_x_start = std::max(0, -in_x_origin);
														
 
															-          const int filter_x_end =
														
 
															-              std::min(params.filter_width, input_width - in_x_origin);
														
 
															-          const int filter_y_start = std::max(0, -in_y_origin);
														
 
															-          const int filter_y_end =
														
 
															-              std::min(params.filter_height, input_height - in_y_origin);
														
 
															-          uint8_t max = 0;
														
 
															-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
														
 
															-               ++filter_y) {
														
 
															-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
														
 
															-                 ++filter_x) {
														
 
															-              const int in_x = in_x_origin + filter_x;
														
 
															-              const int in_y = in_y_origin + filter_y;
														
 
															-              max = std::max(
														
 
															-                  max,
														
 
															-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
														
 
															-            }
														
 
															-          }
														
 
															-          max = std::max<uint8_t>(max, params.quantized_activation_min);
														
 
															-          max = std::min<uint8_t>(max, params.quantized_activation_max);
														
 
															-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
														
 
															-              static_cast<uint8_t>(max);
														
 
															-        }
														
 
															-      }
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-}  // namespace reference_ops
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -1,809 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#include <algorithm>
														
 
															-#include <cmath>
														
 
															-#include <cstdint>
														
 
															-#include <cstring>
														
 
															-#include <limits>
														
 
															-#include <utility>
														
 
															-
														
 
															-#include "fixedpoint/fixedpoint.h"
														
 
															-#include "tensorflow/lite/kernels/internal/common.h"
														
 
															-#include "tensorflow/lite/kernels/internal/compatibility.h"
														
 
															-#include "tensorflow/lite/kernels/internal/cppmath.h"
														
 
															-#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
														
 
															-
														
 
															-#if defined(_MSC_VER)
														
 
															-#define __restrict__ __restrict
														
 
															-#endif
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace tensor_utils {
														
 
															-
														
 
															-namespace {
														
 
															-const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
														
 
															-const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
														
 
															-}  // namespace
														
 
															-
														
 
															-void PortableSymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                                     int8_t* quantized_values, float* min_value,
														
 
															-                                     float* max_value, float* scaling_factor) {
														
 
															-  auto minmax = std::minmax_element(values, values + size);
														
 
															-  *min_value = *minmax.first;
														
 
															-  *max_value = *minmax.second;
														
 
															-
														
 
															-  PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value,
														
 
															-                                  *max_value, scaling_factor);
														
 
															-}
														
 
															-
														
 
															-void PortableSymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                                     int8_t* quantized_values, float min_value,
														
 
															-                                     float max_value, float* scaling_factor) {
														
 
															-  const int32_t kScale = 127;
														
 
															-  const float range = std::max(std::abs(min_value), std::abs(max_value));
														
 
															-  if (range == 0) {
														
 
															-    memset(quantized_values, 0, size * sizeof(int8_t));
														
 
															-    *scaling_factor = 1;
														
 
															-    return;
														
 
															-  }
														
 
															-  *scaling_factor = range / kScale;
														
 
															-  const float scaling_factor_inv = kScale / range;
														
 
															-  for (int i = 0; i < size; ++i) {
														
 
															-    const int32_t quantized_value =
														
 
															-        static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
														
 
															-    // Clamp: just in case some odd numeric offset.
														
 
															-    quantized_values[i] = static_cast<int8_t>(
														
 
															-        std::min(kScale, std::max(-kScale, quantized_value)));
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableAsymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                                      int8_t* quantized_values,
														
 
															-                                      float* scaling_factor, int32_t* offset) {
														
 
															-  const int32_t kMinScale = -128;
														
 
															-  const int32_t kMaxScale = 127;
														
 
															-  const double qmin_double = kMinScale;
														
 
															-  const double qmax_double = kMaxScale;
														
 
															-  const auto minmax = std::minmax_element(values, values + size);
														
 
															-  const double rmin = static_cast<double>(std::min(0.0f, *minmax.first));
														
 
															-  const double rmax = static_cast<double>(std::max(0.0f, *minmax.second));
														
 
															-  if (rmin == rmax) {
														
 
															-    memset(quantized_values, 0, size * sizeof(int8_t));
														
 
															-    *scaling_factor = 1;
														
 
															-    *offset = 0;
														
 
															-    return;
														
 
															-  } else {
														
 
															-    double scale = (rmax - rmin) / (qmax_double - qmin_double);
														
 
															-    const double zero_point_from_min = qmin_double - rmin / scale;
														
 
															-    const double zero_point_from_max = qmax_double - rmax / scale;
														
 
															-    const double zero_point_from_min_error =
														
 
															-        std::abs(qmin_double) + std::abs(rmin / scale);
														
 
															-    const double zero_point_from_max_error =
														
 
															-        std::abs(qmax_double) + std::abs(rmax / scale);
														
 
															-    const double zero_point_double =
														
 
															-        zero_point_from_min_error < zero_point_from_max_error
														
 
															-            ? zero_point_from_min
														
 
															-            : zero_point_from_max;
														
 
															-    int8_t nudged_zero_point = 0;
														
 
															-    if (zero_point_double <= qmin_double) {
														
 
															-      nudged_zero_point = kMinScale;
														
 
															-    } else if (zero_point_double >= qmax_double) {
														
 
															-      nudged_zero_point = kMaxScale;
														
 
															-    } else {
														
 
															-      nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
														
 
															-    }
														
 
															-    *scaling_factor = scale;
														
 
															-    *offset = nudged_zero_point;
														
 
															-  }
														
 
															-  const float scaling_factor_inv = 1.0f / *scaling_factor;
														
 
															-  for (int i = 0; i < size; ++i) {
														
 
															-    const int32_t quantized_value = static_cast<int32_t>(
														
 
															-        TfLiteRound(*offset + values[i] * scaling_factor_inv));
														
 
															-    quantized_values[i] =
														
 
															-        std::min(kMaxScale, std::max(kMinScale, quantized_value));
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
														
 
															-                                                 int m_rows, int m_cols,
														
 
															-                                                 const float* vector,
														
 
															-                                                 int n_batch, float* result) {
														
 
															-  float* result_in_batch = result;
														
 
															-  for (int b = 0; b < n_batch; b++) {
														
 
															-    const float* matrix_ptr = matrix;
														
 
															-    for (int r = 0; r < m_rows; r++) {
														
 
															-      float dot_prod = 0.0f;
														
 
															-      const float* vector_in_batch = vector + b * m_cols;
														
 
															-      for (int c = 0; c < m_cols; c++) {
														
 
															-        dot_prod += *matrix_ptr++ * *vector_in_batch++;
														
 
															-      }
														
 
															-      *result_in_batch += dot_prod;
														
 
															-      ++result_in_batch;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
														
 
															-    const int8_t* __restrict__ vectors, const float* scaling_factors,
														
 
															-    int n_batch, float* __restrict__ result) {
														
 
															-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
														
 
															-    const float batch_scaling_factor = scaling_factors[batch];
														
 
															-    // Get the address of the first row.
														
 
															-    const int8_t* row_ptr = matrix;
														
 
															-    for (int row = 0; row < m_rows; ++row) {
														
 
															-      // Initialize the dot product sum for the row to 0.
														
 
															-      int32_t dotprod = 0;
														
 
															-#if defined(__GNUC__)
														
 
															-      // Prefetch the row to cache.
														
 
															-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
														
 
															-                         3 /* temporal locality */);
														
 
															-#endif
														
 
															-      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
														
 
															-        dotprod += (*row_ptr) * (vectors[col]);
														
 
															-      }  // for col
														
 
															-      *result += dotprod * batch_scaling_factor;
														
 
															-      ++result;
														
 
															-    }  // for row
														
 
															-  }    // for batch
														
 
															-}
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
														
 
															-    const int8_t* __restrict__ vectors, const float* scaling_factors,
														
 
															-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
														
 
															-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
														
 
															-    bool* compute_row_sums, CpuBackendContext* context) {
														
 
															-  if (input_offset == nullptr) {
														
 
															-    PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-        matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
														
 
															-    return;
														
 
															-  }
														
 
															-  if (!compute_row_sums || *compute_row_sums) {
														
 
															-    PortableReductionSumVector(matrix, row_sums, m_rows, m_cols);
														
 
															-    if (compute_row_sums) {
														
 
															-      *compute_row_sums = false;
														
 
															-    }
														
 
															-  }
														
 
															-
														
 
															-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
														
 
															-    const float batch_scaling_factor = scaling_factors[batch];
														
 
															-    const int32_t batch_offset = input_offset[batch];
														
 
															-    const int8_t* row_ptr = matrix;
														
 
															-    for (int row = 0; row < m_rows; ++row) {
														
 
															-      int32_t dotprod = 0;
														
 
															-      float scale = batch_scaling_factor;
														
 
															-      if (per_channel_scale) {
														
 
															-        scale *= per_channel_scale[row];
														
 
															-      }
														
 
															-#if defined(__GNUC__)
														
 
															-      // Prefetch the row to cache.
														
 
															-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
														
 
															-                         3 /* temporal locality */);
														
 
															-#endif
														
 
															-      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
														
 
															-        dotprod += (*row_ptr) * vectors[col];
														
 
															-      }  // for col
														
 
															-      dotprod -= row_sums[row] * batch_offset;
														
 
															-      *result += dotprod * scale;
														
 
															-      ++result;
														
 
															-    }  // for row
														
 
															-  }    // for batch
														
 
															-}
														
 
															-
														
 
															-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
														
 
															-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
														
 
															-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
														
 
															-    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
														
 
															-  const int kBlockSize = 4;
														
 
															-  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
														
 
															-  for (int batch = 0; batch < n_batch; batch++) {
														
 
															-    const float* matrix_ptr = matrix;
														
 
															-    for (int row = 0; row < m_rows; row++) {
														
 
															-      float dot_prod = 0.0f;
														
 
															-      const float* vector_in_batch = vector + batch * m_cols;
														
 
															-      for (int i = segments[row]; i < segments[row + 1]; i++) {
														
 
															-        const int block_start_index = indices[i] * kBlockSize;
														
 
															-        const float* vector_block_in_batch_ptr =
														
 
															-            vector_in_batch + block_start_index;
														
 
															-        for (int c = 0; c < kBlockSize; c++) {
														
 
															-          dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
														
 
															-        }
														
 
															-      }
														
 
															-      result[batch * m_rows + row] += dot_prod;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
														
 
															-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
														
 
															-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
														
 
															-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
														
 
															-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
														
 
															-    const int32_t output_shift, const int32_t output_offset,
														
 
															-    const int32_t output_activation_min, const int32_t output_activation_max,
														
 
															-    int8_t* __restrict__ result) {
														
 
															-  const int kBlockSize = 16;
														
 
															-  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    const int8_t* matrix_ptr = matrix;
														
 
															-    for (int row = 0; row < m_rows; ++row) {
														
 
															-      int32_t dot_prod = 0;
														
 
															-      const int8_t* vector_in_batch = vector + batch * m_cols;
														
 
															-      for (int i = segments[row]; i < segments[row + 1]; ++i) {
														
 
															-        const int block_start_index = indices[i] * kBlockSize;
														
 
															-        const int8_t* vector_block_in_batch_ptr =
														
 
															-            vector_in_batch + block_start_index;
														
 
															-        for (int c = 0; c < kBlockSize; c++) {
														
 
															-          dot_prod += *matrix_ptr * *vector_block_in_batch_ptr++;
														
 
															-          dot_prod += *matrix_ptr++ * input_offset;
														
 
															-        }
														
 
															-      }
														
 
															-      const int32_t bias_value = bias_vector != nullptr ? bias_vector[row] : 0;
														
 
															-      dot_prod = MultiplyByQuantizedMultiplier(dot_prod + bias_value,
														
 
															-                                               output_multiplier, output_shift);
														
 
															-      dot_prod += output_offset;
														
 
															-      result[batch * m_rows + row] =
														
 
															-          static_cast<int8_t>(ActivationFunctionWithMinMax(
														
 
															-              dot_prod, output_activation_min, output_activation_max));
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
														
 
															-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
														
 
															-    float* __restrict__ result) {
														
 
															-  const int kBlockSize = 16;
														
 
															-  TFLITE_DCHECK_EQ(  // NOLINT
														
 
															-      m_cols % kBlockSize, 0);
														
 
															-  for (int batch = 0; batch < n_batch; batch++) {
														
 
															-    const float* matrix_ptr = matrix;
														
 
															-    const uint8_t* ledger_ptr = ledger;
														
 
															-    for (int row = 0; row < m_rows; row++) {
														
 
															-      float dot_prod = 0.0f;
														
 
															-      int num_nonzero_blocks = *ledger_ptr++;
														
 
															-      if (num_nonzero_blocks > 0) {
														
 
															-        const float* vector_in_batch = vector + batch * m_cols;
														
 
															-        for (int i = 0; i < num_nonzero_blocks; i++) {
														
 
															-          const int block_start_index = *ledger_ptr++ * kBlockSize;
														
 
															-          const float* vector_block_in_batch_ptr =
														
 
															-              vector_in_batch + block_start_index;
														
 
															-          for (int c = 0; c < kBlockSize; c++) {
														
 
															-            dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
														
 
															-          }
														
 
															-        }
														
 
															-      }
														
 
															-      result[batch * m_rows + row] += dot_prod;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
														
 
															-    const int m_cols, const int8_t* __restrict__ vectors,
														
 
															-    const float* scaling_factors, int n_batch, float* __restrict__ result) {
														
 
															-  static const int kBlockSize = 16;
														
 
															-  TFLITE_DCHECK_EQ(  // NOLINT
														
 
															-      m_cols % kBlockSize, 0);
														
 
															-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
														
 
															-    const float batch_scaling_factor = scaling_factors[batch];
														
 
															-    const uint8_t* ledger_ptr = ledger;
														
 
															-    // Get the address of the first row.
														
 
															-    const int8_t* row_ptr = matrix;
														
 
															-    for (int row = 0; row < m_rows; ++row) {
														
 
															-      // Initialize the dot product sum for the row to 0.
														
 
															-      int32_t dotprod = 0;
														
 
															-#if defined(__GNUC__)
														
 
															-      // Prefetch the row to cache.
														
 
															-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
														
 
															-                         3 /* temporal locality */);
														
 
															-#endif
														
 
															-      int num_nonzero_blocks = *ledger_ptr++;
														
 
															-      for (int i = 0; i < num_nonzero_blocks; i++) {
														
 
															-        const int block_start_index = *ledger_ptr++ * kBlockSize;
														
 
															-        const int8_t* vector_block_ptr = vectors + block_start_index;
														
 
															-        for (int c = 0; c < kBlockSize; c++) {
														
 
															-          dotprod += (*row_ptr++) * (*vector_block_ptr++);
														
 
															-        }  // for block
														
 
															-      }    // for num_nonzero_blocks
														
 
															-      result[batch * m_rows + row] += dotprod * batch_scaling_factor;
														
 
															-    }  // for row
														
 
															-  }    // for batch
														
 
															-}
														
 
															-
														
 
															-template <typename T>
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulateImpl(
														
 
															-    const int8_t* input, const int32_t* bias,
														
 
															-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
														
 
															-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
														
 
															-    T* output) {
														
 
															-  const int16_t output_max = std::numeric_limits<T>::max();
														
 
															-  const int16_t output_min = std::numeric_limits<T>::min();
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int row = 0; row < n_output; ++row) {
														
 
															-      int32_t acc = bias[row];
														
 
															-      for (int col = 0; col < n_input; ++col) {
														
 
															-        int8_t input_val = input[batch * n_input + col];
														
 
															-        int8_t weights_val = input_to_gate_weights[row * n_input + col];
														
 
															-        acc += input_val * weights_val;
														
 
															-      }
														
 
															-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
														
 
															-      acc += output_zp;
														
 
															-      acc += output[batch * n_output + row];
														
 
															-      if (acc > output_max) {
														
 
															-        acc = output_max;
														
 
															-      }
														
 
															-      if (acc < output_min) {
														
 
															-        acc = output_min;
														
 
															-      }
														
 
															-      output[batch * n_output + row] = static_cast<T>(acc);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* input, const int32_t* bias,
														
 
															-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
														
 
															-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
														
 
															-    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
														
 
															-  PortableMatrixBatchVectorMultiplyAccumulateImpl(
														
 
															-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
														
 
															-      n_output, output_zp, output);
														
 
															-}
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* input, const int32_t* bias,
														
 
															-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
														
 
															-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
														
 
															-    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
														
 
															-  PortableMatrixBatchVectorMultiplyAccumulateImpl(
														
 
															-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
														
 
															-      n_output, output_zp, output);
														
 
															-}
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiply(const int8_t* input,
														
 
															-                                       int32_t input_zeropoint,
														
 
															-                                       const int8_t* input_to_gate_weights,
														
 
															-                                       int32_t input_to_gate_effective_scale_a,
														
 
															-                                       int32_t input_to_gate_effective_scale_b,
														
 
															-                                       int32_t n_batch, int32_t n_input,
														
 
															-                                       int32_t n_cell, int8_t* gate_output,
														
 
															-                                       int8_t gate_output_zp) {
														
 
															-  const int32_t int8_max = std::numeric_limits<int8_t>::max();
														
 
															-  const int32_t int8_min = std::numeric_limits<int8_t>::min();
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int row = 0; row < n_cell; ++row) {
														
 
															-      int32_t acc = 0;
														
 
															-      for (int col = 0; col < n_input; ++col) {
														
 
															-        int32_t input_val = input[batch * n_input + col];
														
 
															-        int8_t weights_val = input_to_gate_weights[row * n_input + col];
														
 
															-        acc += (input_val - input_zeropoint) * weights_val;
														
 
															-      }
														
 
															-      acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a,
														
 
															-                                          input_to_gate_effective_scale_b);
														
 
															-      acc += gate_output_zp;
														
 
															-      if (acc > int8_max) {
														
 
															-        acc = int8_max;
														
 
															-      }
														
 
															-      if (acc < int8_min) {
														
 
															-        acc = int8_min;
														
 
															-      }
														
 
															-      gate_output[batch * n_cell + row] = static_cast<int8_t>(acc);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiply(
														
 
															-    const int16_t* hidden, const int8_t* hidden_to_output_weights,
														
 
															-    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
														
 
															-    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
														
 
															-    int32_t n_output, int32_t output_zp, int8_t* proj_output) {
														
 
															-  const int16_t int8_max = std::numeric_limits<int8_t>::max();
														
 
															-  const int16_t int8_min = std::numeric_limits<int8_t>::min();
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int row = 0; row < n_output; ++row) {
														
 
															-      int64_t acc = gate_bias[row];
														
 
															-      for (int col = 0; col < n_hidden; ++col) {
														
 
															-        int16_t input_val = hidden[batch * n_hidden + col];
														
 
															-        int8_t weights_val = hidden_to_output_weights[row * n_hidden + col];
														
 
															-        int64_t curr = acc;
														
 
															-        acc += input_val * weights_val;
														
 
															-        if (input_val * weights_val > 0 && acc < curr) {
														
 
															-          acc = std::numeric_limits<int32_t>::max();
														
 
															-        }
														
 
															-        if (input_val * weights_val < 0 && acc > curr) {
														
 
															-          acc = std::numeric_limits<int32_t>::min();
														
 
															-        }
														
 
															-      }
														
 
															-      acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a,
														
 
															-                                          proj_effective_scale_b);
														
 
															-      acc += output_zp;
														
 
															-      if (acc > int8_max) {
														
 
															-        acc = int8_max;
														
 
															-      }
														
 
															-      if (acc < int8_min) {
														
 
															-        acc = int8_min;
														
 
															-      }
														
 
															-      proj_output[batch * n_output + row] = acc;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableApplyLayerNorm(const int16_t* input,
														
 
															-                            const int16_t* layer_norm_weights,
														
 
															-                            const int32_t* bias, int32_t layer_norm_scale_a,
														
 
															-                            int32_t layer_norm_scale_b, int32_t variance_limit,
														
 
															-                            int n_batch, int n_input, int16_t* output) {
														
 
															-  // The square of std::pow(2, 10), which is the extra factor that makes sure
														
 
															-  // normalized values has enough resolution.
														
 
															-  static const int kTwoToPower20 = 1 << 20;
														
 
															-  for (int i = 0; i < n_batch; ++i) {
														
 
															-    int64_t sum = 0;
														
 
															-    int64_t sum_sq = 0;
														
 
															-    for (int j = 0; j < n_input; ++j) {
														
 
															-      const int32_t index = i * n_input + j;
														
 
															-      int32_t val = static_cast<int32_t>(input[index]);
														
 
															-      sum += val;
														
 
															-      sum_sq += val * val;
														
 
															-    }
														
 
															-    int32_t mean =
														
 
															-        static_cast<int32_t>(static_cast<int64_t>(sum) * 1024 / n_input);
														
 
															-    // TODO(b/173994730): Avoids overflow but only works for POT n_input.
														
 
															-    int32_t temp = kTwoToPower20 / n_input;
														
 
															-    int64_t variance =
														
 
															-        sum_sq * temp - static_cast<int64_t>(mean) * static_cast<int64_t>(mean);
														
 
															-    int32_t variance2 = static_cast<int32_t>(variance / kTwoToPower20);
														
 
															-    if (variance2 < 1) {
														
 
															-      variance2 = variance_limit;
														
 
															-    }
														
 
															-    int32_t stddev_inverse_a;
														
 
															-    int stddev_inverse_b;
														
 
															-    GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1,
														
 
															-                                     &stddev_inverse_a, &stddev_inverse_b);
														
 
															-
														
 
															-    for (int j = 0; j < n_input; ++j) {
														
 
															-      const int32_t index = i * n_input + j;
														
 
															-      int32_t val = static_cast<int32_t>(input[index]);
														
 
															-      int32_t shifted = 1024 * val - mean;
														
 
															-      int32_t rescaled = MultiplyByQuantizedMultiplier(
														
 
															-          shifted, stddev_inverse_a, stddev_inverse_b);
														
 
															-      // TODO(jianlijianli): Saturate this.
														
 
															-      int64_t val3 = rescaled * layer_norm_weights[j] + bias[j];
														
 
															-      int32_t val4 =
														
 
															-          static_cast<int32_t>((val3 > 0 ? val3 + 512 : val3 - 512) / 1024);
														
 
															-      int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a,
														
 
															-                                                   layer_norm_scale_b + 12);
														
 
															-      val5 = std::min(std::max(kInt16Min, val5), kInt16Max);
														
 
															-      output[index] = static_cast<int16_t>(val5);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableApplyLayerNormFloat(const int16_t* input,
														
 
															-                                 const int16_t* layer_norm_weights,
														
 
															-                                 int32_t layer_norm_scale_a,
														
 
															-                                 int32_t layer_norm_scale_b,
														
 
															-                                 const int32_t* bias, int n_batch, int n_input,
														
 
															-                                 int16_t* output) {
														
 
															-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
														
 
															-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
														
 
															-  const float layer_norm_scale =
														
 
															-      layer_norm_scale_a *
														
 
															-      std::pow(2.0, static_cast<double>(layer_norm_scale_b - 31));
														
 
															-  const float bias_scale =
														
 
															-      static_cast<float>(std::pow(2.0, -10)) * layer_norm_scale;
														
 
															-
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    float sum = 0.0f;
														
 
															-    float sum_sq = 0.0f;
														
 
															-    for (int i = 0; i < n_input; ++i) {
														
 
															-      const int index = batch * n_input + i;
														
 
															-      const float value = static_cast<float>(input[index]);
														
 
															-      sum += value;
														
 
															-      sum_sq += value * value;
														
 
															-    }
														
 
															-    const float mean = sum / n_input;
														
 
															-    float stddev_inv = 0.0f;
														
 
															-    const float variance = sum_sq / n_input - mean * mean;
														
 
															-    if (variance == 0) {
														
 
															-      stddev_inv = 1.0f / std::sqrt(1e-8f);
														
 
															-    } else {
														
 
															-      stddev_inv = 1.0f / std::sqrt(variance);
														
 
															-    }
														
 
															-    for (int i = 0; i < n_input; ++i) {
														
 
															-      const int index = batch * n_input + i;
														
 
															-      const float normalized_value =
														
 
															-          (static_cast<float>(input[index]) - mean) * stddev_inv;
														
 
															-      const float weighted_normalized_value =
														
 
															-          normalized_value * layer_norm_weights[i] * layer_norm_scale +
														
 
															-          bias[i] * bias_scale;
														
 
															-      const int32_t quant_output = static_cast<int32_t>(round(
														
 
															-          weighted_normalized_value * static_cast<float>(std::pow(2, 12))));
														
 
															-      output[index] = std::min(int16_max, std::max(int16_min, quant_output));
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
														
 
															-                                            int32_t scalar, int32_t n_row,
														
 
															-                                            int32_t n_col, int32_t* output) {
														
 
															-  for (int i = 0; i < n_row; ++i) {
														
 
															-    int32_t row_sum = 0;
														
 
															-    for (int j = 0; j < n_col; ++j) {
														
 
															-      row_sum += *matrix++;
														
 
															-    }
														
 
															-    output[i] += row_sum * scalar;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
														
 
															-                          int32_t n_input, int16_t* output) {
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int c = 0; c < n_input; c++) {
														
 
															-      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
														
 
															-      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
														
 
															-      const int index = batch * n_input + c;
														
 
															-      F3 sigmoid_input = F3::FromRaw(input[index]);
														
 
															-      F0 sigmoid_output = gemmlowp::logistic(sigmoid_input);
														
 
															-      output[index] = sigmoid_output.raw();
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
														
 
															-                               int32_t n_input, int16_t* output) {
														
 
															-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
														
 
															-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int i = 0; i < n_input; ++i) {
														
 
															-      const int index = batch * n_input + i;
														
 
															-      const float float_input =
														
 
															-          input[index] * static_cast<float>(std::pow(2, -12));
														
 
															-      const float float_output = 1.0f / (1.0f + std::exp(-float_input));
														
 
															-      const int32_t quant_output = static_cast<int32_t>(
														
 
															-          float_output * static_cast<float>(std::pow(2, 15)));
														
 
															-      const int32_t quant_output_clamped =
														
 
															-          std::min(int16_max, std::max(int16_min, quant_output));
														
 
															-      output[index] = static_cast<int16_t>(quant_output_clamped);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-template <int IntegerBits>
														
 
															-void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch,
														
 
															-                           int32_t n_input, int16_t* output) {
														
 
															-  using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
														
 
															-  using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int i = 0; i < n_input; ++i) {
														
 
															-      const int index = batch * n_input + i;
														
 
															-      FX tanh_input = FX::FromRaw(input[index]);
														
 
															-      F0 tanh_output = gemmlowp::tanh(tanh_input);
														
 
															-      output[index] = tanh_output.raw();
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
														
 
															-                       int32_t n_batch, int32_t n_input, int16_t* output) {
														
 
															-  assert(integer_bits <= 6);
														
 
															-#define DISPATCH_TANH(i)                                       \
														
 
															-  case i:                                                      \
														
 
															-    PortableApplyTanhImpl<i>(input, n_batch, n_input, output); \
														
 
															-    break;
														
 
															-  switch (integer_bits) {
														
 
															-    DISPATCH_TANH(0);
														
 
															-    DISPATCH_TANH(1);
														
 
															-    DISPATCH_TANH(2);
														
 
															-    DISPATCH_TANH(3);
														
 
															-    DISPATCH_TANH(4);
														
 
															-    DISPATCH_TANH(5);
														
 
															-    DISPATCH_TANH(6);
														
 
															-    default:
														
 
															-      return;
														
 
															-  }
														
 
															-#undef DISPATCH_TANH
														
 
															-}
														
 
															-
														
 
															-void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
														
 
															-                            int32_t n_input, int32_t integer_bits,
														
 
															-                            int16_t* output) {
														
 
															-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
														
 
															-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
														
 
															-  const double two = 2.0;
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int i = 0; i < n_input; ++i) {
														
 
															-      const int index = batch * n_input + i;
														
 
															-      const float float_input =
														
 
															-          input[index] * std::pow(two, static_cast<double>(integer_bits));
														
 
															-      const float float_output = std::tanh(float_input);
														
 
															-      const int32_t quant_output = static_cast<int32_t>(
														
 
															-          float_output * static_cast<float>(std::pow(2, 15)));
														
 
															-      const int32_t quant_output_clamped =
														
 
															-          std::min(int16_max, std::max(int16_min, quant_output));
														
 
															-      output[index] = static_cast<int16_t>(quant_output_clamped);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
														
 
															-                      int n_batch, int n_input, int shift, int16_t* output) {
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int i = 0; i < n_input; ++i) {
														
 
															-      const int index = batch * n_input + i;
														
 
															-      const int16_t a = input_1[index];
														
 
															-      const int16_t b = input_2[index];
														
 
															-      const int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
														
 
															-      output[index] =
														
 
															-          static_cast<int16_t>(gemmlowp::RoundingDivideByPOT(value, shift));
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
														
 
															-                      int32_t multiplier, int32_t shift, int32_t n_batch,
														
 
															-                      int32_t n_input, int32_t output_zp, int8_t* output) {
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int i = 0; i < n_input; ++i) {
														
 
															-      const int index = batch * n_input + i;
														
 
															-      const int16_t a = input_1[index];
														
 
															-      const int16_t b = input_2[index];
														
 
															-      int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
														
 
															-      value = MultiplyByQuantizedMultiplier(value, multiplier, shift);
														
 
															-      value -= output_zp;
														
 
															-      value = std::min(std::max(static_cast<int32_t>(-128), value),
														
 
															-                       static_cast<int32_t>(127));
														
 
															-
														
 
															-      output[index] = static_cast<int8_t>(value);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
														
 
															-                      int n_batch, int n_input, int16_t* output) {
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    for (int i = 0; i < n_input; ++i) {
														
 
															-      const int index = batch * n_input + i;
														
 
															-      int32_t sum = input_1[index] + input_2[index];
														
 
															-      const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
														
 
															-      output[index] = static_cast<int16_t>(sum_clamped);
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
														
 
															-                                     int v_size) {
														
 
															-  float result = 0.0;
														
 
															-  for (int v = 0; v < v_size; v++) {
														
 
															-    result += *vector1++ * *vector2++;
														
 
															-  }
														
 
															-  return result;
														
 
															-}
														
 
															-
														
 
															-namespace {
														
 
															-inline int32_t VectorVectorDotProduct(const int16_t* vector1,
														
 
															-                                      const int16_t* vector2, int v_size) {
														
 
															-  int32_t result = 0;
														
 
															-  for (int v = 0; v < v_size; v++) {
														
 
															-    result += *vector1++ * *vector2++;
														
 
															-  }
														
 
															-  return result;
														
 
															-}
														
 
															-}  // namespace
														
 
															-
														
 
															-void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
														
 
															-                                              const int16_t* vector2,
														
 
															-                                              int v_size, int n_batch,
														
 
															-                                              int32_t* result) {
														
 
															-  for (int b = 0; b < n_batch; b++) {
														
 
															-    result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
														
 
															-    vector1 += v_size;
														
 
															-    vector2 += v_size;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableVectorBatchVectorCwiseProductAccumulate(
														
 
															-    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
														
 
															-    int32_t multiplier, int shift, int16_t* result) {
														
 
															-  for (int b = 0; b < n_batch; b++) {
														
 
															-    for (int v = 0; v < v_size; v++) {
														
 
															-      int32_t prod = vector[v] * *batch_vector++;
														
 
															-      prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
														
 
															-      int32_t output = prod + *result;
														
 
															-      output = std::max(std::min(static_cast<int32_t>(32767), output),
														
 
															-                        static_cast<int32_t>(-32768));
														
 
															-      *result++ = output;
														
 
															-    }
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableSub1Vector(const float* vector, int v_size, float* result) {
														
 
															-  for (int v = 0; v < v_size; v++) {
														
 
															-    *result++ = 1.0f - *vector++;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) {
														
 
															-  static const int16_t kOne = 32767;
														
 
															-  for (int v = 0; v < v_size; v++) {
														
 
															-    *result++ = kOne - *vector++;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
														
 
															-                                  const float scale, float* result) {
														
 
															-  for (int v = 0; v < v_size; ++v) {
														
 
															-    *result++ = scale * *vector++;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
														
 
															-                                     float* __restrict__ output_vector,
														
 
															-                                     int v_size, int n_batch) {
														
 
															-  for (int batch = 0; batch < n_batch; ++batch) {
														
 
															-    float sum = 0.0f;
														
 
															-    for (int i = 0; i < v_size; ++i) {
														
 
															-      sum += input_vector[i];
														
 
															-    }
														
 
															-    const float mean = sum / v_size;
														
 
															-    float sum_diff_sq = 0.0f;
														
 
															-    for (int i = 0; i < v_size; ++i) {
														
 
															-      const float diff = input_vector[i] - mean;
														
 
															-      sum_diff_sq += diff * diff;
														
 
															-    }
														
 
															-    const float variance = sum_diff_sq / v_size;
														
 
															-    constexpr float kNormalizationConstant = 1e-8f;
														
 
															-    const float stddev_inv =
														
 
															-        1.0f / std::sqrt(variance + kNormalizationConstant);
														
 
															-    for (int i = 0; i < v_size; ++i) {
														
 
															-      output_vector[i] = (input_vector[i] - mean) * stddev_inv;
														
 
															-    }
														
 
															-    input_vector += v_size;
														
 
															-    output_vector += v_size;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
														
 
															-                                  const int8_t* recurrent, int8_t recurrent_zp,
														
 
															-                                  int32_t input_effective_scale_a,
														
 
															-                                  int32_t input_effective_scale_b,
														
 
															-                                  int32_t recurrent_effective_scale_a,
														
 
															-                                  int32_t recurrent_effective_scale_b,
														
 
															-                                  int32_t n_batch, int32_t n_cell,
														
 
															-                                  int16_t* output) {
														
 
															-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
														
 
															-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
														
 
															-  for (int i = 0; i < n_batch * n_cell; ++i) {
														
 
															-    int32_t x = static_cast<int32_t>(input[i]) - static_cast<int32_t>(input_zp);
														
 
															-    int32_t h =
														
 
															-        static_cast<int32_t>(recurrent[i]) - static_cast<int32_t>(recurrent_zp);
														
 
															-    int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a,
														
 
															-                                                     input_effective_scale_b);
														
 
															-    int32_t h_scaled = MultiplyByQuantizedMultiplier(
														
 
															-        h, recurrent_effective_scale_a, recurrent_effective_scale_b);
														
 
															-    int32_t y = h_scaled + x_scaled;
														
 
															-    if (y > int16_max) {
														
 
															-      y = int16_max;
														
 
															-    }
														
 
															-    if (y < int16_min) {
														
 
															-      y = int16_min;
														
 
															-    }
														
 
															-    output[i] = static_cast<int16_t>(y);
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-}  // namespace tensor_utils
														
 
															-}  // namespace tflite
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
@@ -1,333 +0,0 @@
 
															-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
														
 
															-
														
 
															-#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
														
 
															-
														
 
															-#if defined(_MSC_VER)
														
 
															-#define __restrict__ __restrict
														
 
															-#endif
														
 
															-
														
 
															-namespace tflite {
														
 
															-namespace tensor_utils {
														
 
															-
														
 
															-// Check if all entries of a vector are zero for float.
														
 
															-bool IsZeroVector(const float* vector, int v_size) {
														
 
															-  return PortableIsZeroVector(vector, v_size);
														
 
															-}
														
 
															-
														
 
															-// Check if all entries of a vector are zero for int8_t.
														
 
															-bool IsZeroVector(const int8_t* vector, int v_size) {
														
 
															-  return PortableIsZeroVector(vector, v_size);
														
 
															-}
														
 
															-
														
 
															-void SymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                             int8_t* quantized_values, float* min, float* max,
														
 
															-                             float* scaling_factor) {
														
 
															-  PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
														
 
															-                                  scaling_factor);
														
 
															-}
														
 
															-
														
 
															-void SymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                             int8_t* quantized_values, float min_value,
														
 
															-                             float max_value, float* scaling_factor) {
														
 
															-  PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
														
 
															-                                  max_value, scaling_factor);
														
 
															-}
														
 
															-
														
 
															-void AsymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                              int8_t* quantized_values, float* scaling_factor,
														
 
															-                              int32_t* offset) {
														
 
															-  PortableAsymmetricQuantizeFloats(values, size, quantized_values,
														
 
															-                                   scaling_factor, offset);
														
 
															-}
														
 
															-
														
 
															-void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
														
 
															-                                         int m_cols, const float* vector,
														
 
															-                                         int n_batch, float* result) {
														
 
															-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
														
 
															-                                              n_batch, result);
														
 
															-}
														
 
															-
														
 
															-void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
														
 
															-                                         const int m_rows, const int m_cols,
														
 
															-                                         const int8_t* __restrict__ vector,
														
 
															-                                         const float* scaling_factors,
														
 
															-                                         int n_batch,
														
 
															-                                         float* __restrict__ result) {
														
 
															-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
														
 
															-                                              scaling_factors, n_batch, result);
														
 
															-}
														
 
															-
														
 
															-void MatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
														
 
															-    const int8_t* __restrict__ vectors, const float* scaling_factors,
														
 
															-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
														
 
															-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
														
 
															-    bool* compute_row_sums, CpuBackendContext* context) {
														
 
															-  PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-      matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
														
 
															-      per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
														
 
															-      context);
														
 
															-}
														
 
															-
														
 
															-void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
														
 
															-                                         const int m_rows, const int m_cols,
														
 
															-                                         const int8_t* __restrict__ vector,
														
 
															-                                         const float* scaling_factors,
														
 
															-                                         int n_batch, int32_t* scratch,
														
 
															-                                         float* __restrict__ result,
														
 
															-                                         CpuBackendContext* context) {
														
 
															-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
														
 
															-                                              scaling_factors, n_batch, result);
														
 
															-}
														
 
															-
														
 
															-void SparseMatrixBatchVectorMultiplyAccumulate1x4(
														
 
															-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
														
 
															-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
														
 
															-    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
														
 
															-  PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
														
 
															-      matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
														
 
															-}
														
 
															-
														
 
															-void SparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
														
 
															-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
														
 
															-    float* __restrict__ result) {
														
 
															-  PortableSparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-      matrix, ledger, m_rows, m_cols, vector, n_batch, result);
														
 
															-}
														
 
															-
														
 
															-void SparseMatrixBatchVectorMultiplyAccumulate1x16(
														
 
															-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
														
 
															-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
														
 
															-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
														
 
															-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
														
 
															-    const int32_t output_shift, const int32_t output_offset,
														
 
															-    const int32_t output_activation_min, const int32_t output_activation_max,
														
 
															-
														
 
															-    int8_t* __restrict__ result) {
														
 
															-  PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
														
 
															-      matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch,
														
 
															-      input_offset, output_multiplier, output_shift, output_offset,
														
 
															-      output_activation_min, output_activation_max, result);
														
 
															-}
														
 
															-
														
 
															-void SparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
														
 
															-    const int m_cols, const int8_t* __restrict__ vectors,
														
 
															-    const float* scaling_factors, int n_batch, float* __restrict__ result) {
														
 
															-  PortableSparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-      matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
														
 
															-      result);
														
 
															-}
														
 
															-
														
 
															-void MatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* input, const int32_t* bias,
														
 
															-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
														
 
															-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
														
 
															-    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
														
 
															-  PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
														
 
															-      n_output, output_zp, scratch, output, context);
														
 
															-}
														
 
															-
														
 
															-void MatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* input, const int32_t* bias,
														
 
															-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
														
 
															-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
														
 
															-    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
														
 
															-  PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
														
 
															-      n_output, output_zp, scratch, output, context);
														
 
															-}
														
 
															-
														
 
															-void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
														
 
															-                                    int32_t n_row, int32_t n_col,
														
 
															-                                    int32_t* output) {
														
 
															-  PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
														
 
															-}
														
 
															-
														
 
															-void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
														
 
															-                               const int8_t* input_to_gate_weights,
														
 
															-                               int32_t input_to_gate_effective_scale_a,
														
 
															-                               int32_t input_to_gate_effective_scale_b,
														
 
															-                               int32_t n_batch, int32_t n_input, int32_t n_cell,
														
 
															-                               int8_t* gate_output, int8_t gate_output_zp) {
														
 
															-  PortableMatrixBatchVectorMultiply(
														
 
															-      input, input_zeropoint, input_to_gate_weights,
														
 
															-      input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
														
 
															-      n_input, n_cell, gate_output, gate_output_zp);
														
 
															-}
														
 
															-
														
 
															-void MatrixBatchVectorMultiply(const int16_t* hidden,
														
 
															-                               const int8_t* hidden_to_output_weights,
														
 
															-                               int32_t proj_effective_scale_a,
														
 
															-                               int32_t proj_effective_scale_b,
														
 
															-                               const int32_t* gate_bias, int32_t n_batch,
														
 
															-                               int32_t n_hidden, int32_t n_output,
														
 
															-                               int32_t output_zp, int8_t* proj_output) {
														
 
															-  PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
														
 
															-                                    proj_effective_scale_a,
														
 
															-                                    proj_effective_scale_b, gate_bias, n_batch,
														
 
															-                                    n_hidden, n_output, output_zp, proj_output);
														
 
															-}
														
 
															-
														
 
															-void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
														
 
															-                    const int32_t* bias, int32_t layer_norm_scale_a,
														
 
															-                    int32_t layer_norm_scale_b, int32_t variance_limit,
														
 
															-                    int n_batch, int n_input, int16_t* output) {
														
 
															-  PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
														
 
															-                         layer_norm_scale_b, variance_limit, n_batch, n_input,
														
 
															-                         output);
														
 
															-}
														
 
															-
														
 
															-void ApplyLayerNormFloat(const int16_t* input,
														
 
															-                         const int16_t* layer_norm_weights,
														
 
															-                         int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
														
 
															-                         const int32_t* bias, int n_batch, int n_input,
														
 
															-                         int16_t* output) {
														
 
															-  PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
														
 
															-                              layer_norm_scale_b, bias, n_batch, n_input,
														
 
															-                              output);
														
 
															-}
														
 
															-
														
 
															-void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
														
 
															-                  int16_t* output) {
														
 
															-  PortableApplySigmoid(input, n_batch, n_input, output);
														
 
															-}
														
 
															-
														
 
															-void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
														
 
															-                       int16_t* output) {
														
 
															-  PortableApplySigmoidFloat(input, n_batch, n_input, output);
														
 
															-}
														
 
															-
														
 
															-void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
														
 
															-               int32_t n_input, int16_t* output) {
														
 
															-  PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
														
 
															-}
														
 
															-
														
 
															-void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
														
 
															-                    int32_t integer_bits, int16_t* output) {
														
 
															-  PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
														
 
															-}
														
 
															-
														
 
															-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
														
 
															-              int n_input, int shift, int16_t* output) {
														
 
															-  PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
														
 
															-}
														
 
															-
														
 
															-void CwiseMul(const int16_t* input_1, const int16_t* input_2,
														
 
															-              int32_t multiplier, int32_t shift, int32_t n_batch,
														
 
															-              int32_t n_input, int32_t output_zp, int8_t* output) {
														
 
															-  PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
														
 
															-                   output_zp, output);
														
 
															-}
														
 
															-
														
 
															-void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
														
 
															-              int n_input, int16_t* output) {
														
 
															-  PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
														
 
															-}
														
 
															-
														
 
															-void CwiseClipping(float* vector, const int v_size,
														
 
															-                   const float clipping_value) {
														
 
															-  PortableCwiseClipping(vector, v_size, clipping_value);
														
 
															-}
														
 
															-
														
 
															-void CwiseClipping(int16_t* vector, const int v_size,
														
 
															-                   const int16_t clipping_value) {
														
 
															-  PortableCwiseClipping(vector, v_size, clipping_value);
														
 
															-}
														
 
															-
														
 
															-void CwiseClipping(int8_t* vector, const int v_size,
														
 
															-                   const int8_t clipping_value) {
														
 
															-  PortableCwiseClipping(vector, v_size, clipping_value);
														
 
															-}
														
 
															-
														
 
															-void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
														
 
															-                                             const int16_t* batch_vector,
														
 
															-                                             int n_batch, int32_t multiplier,
														
 
															-                                             int shift, int16_t* result) {
														
 
															-  PortableVectorBatchVectorCwiseProductAccumulate(
														
 
															-      vector, v_size, batch_vector, n_batch, multiplier, shift, result);
														
 
															-}
														
 
															-
														
 
															-float VectorVectorDotProduct(const float* vector1, const float* vector2,
														
 
															-                             int v_size) {
														
 
															-  return PortableVectorVectorDotProduct(vector1, vector2, v_size);
														
 
															-}
														
 
															-
														
 
															-void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
														
 
															-                                      const int16_t* vector2, int v_size,
														
 
															-                                      int n_batch, int32_t* result) {
														
 
															-  PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch,
														
 
															-                                           result);
														
 
															-}
														
 
															-
														
 
															-void Sub1Vector(const float* vector, int v_size, float* result) {
														
 
															-  PortableSub1Vector(vector, v_size, result);
														
 
															-}
														
 
															-
														
 
															-void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
														
 
															-  PortableSub1Vector(vector, v_size, result);
														
 
															-}
														
 
															-
														
 
															-// Multiply all elements of vector with a scalar.
														
 
															-void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
														
 
															-                          float* result) {
														
 
															-  PortableVectorScalarMultiply(vector, v_size, scale, result);
														
 
															-}
														
 
															-
														
 
															-void ReductionSumVector(const float* input_vector, float* output_vector,
														
 
															-                        int output_size, int reduction_size) {
														
 
															-  PortableReductionSumVector(input_vector, output_vector, output_size,
														
 
															-                             reduction_size);
														
 
															-}
														
 
															-
														
 
															-void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
														
 
															-                        int output_size, int reduction_size) {
														
 
															-  PortableReductionSumVector(input_vector, output_vector, output_size,
														
 
															-                             reduction_size);
														
 
															-}
														
 
															-
														
 
															-void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
														
 
															-                        int output_size, int reduction_size) {
														
 
															-  PortableReductionSumVector(input_vector, output_vector, output_size,
														
 
															-                             reduction_size);
														
 
															-}
														
 
															-
														
 
															-void MeanStddevNormalization(const float* input_vector, float* output_vector,
														
 
															-                             int v_size, int n_batch) {
														
 
															-  PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
														
 
															-}
														
 
															-
														
 
															-void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
														
 
															-                          const int8_t* recurrent, int8_t recurrent_zp,
														
 
															-                          int32_t input_effective_scale_a,
														
 
															-                          int32_t input_effective_scale_b,
														
 
															-                          int32_t recurrent_effective_scale_a,
														
 
															-                          int32_t recurrent_effective_scale_b, int32_t n_batch,
														
 
															-                          int32_t n_cell, int16_t* output) {
														
 
															-  PortableTwoGateSaturatingAdd(
														
 
															-      input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
														
 
															-      input_effective_scale_b, recurrent_effective_scale_a,
														
 
															-      recurrent_effective_scale_b, n_batch, n_cell, output);
														
 
															-}
														
 
															-
														
 
															-}  // namespace tensor_utils
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
														
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
@@ -1,244 +0,0 @@
 
															-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
														
 
															-
														
 
															-Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-you may not use this file except in compliance with the License.
														
 
															-You may obtain a copy of the License at
														
 
															-
														
 
															-    http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-Unless required by applicable law or agreed to in writing, software
														
 
															-distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-See the License for the specific language governing permissions and
														
 
															-limitations under the License.
														
 
															-==============================================================================*/
														
 
															-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
														
 
															-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
														
 
															-
														
 
															-#include <algorithm>
														
 
															-#include <cstdint>
														
 
															-
														
 
															-#if defined(_MSC_VER)
														
 
															-#define __restrict__ __restrict
														
 
															-#endif
														
 
															-
														
 
															-namespace tflite {
														
 
															-
														
 
															-// Not all backends support CpuBackendContext usage, so forward declare to avoid
														
 
															-// pulling in its implementation.
														
 
															-class CpuBackendContext;
														
 
															-
														
 
															-namespace tensor_utils {
														
 
															-
														
 
															-template <typename T>
														
 
															-bool PortableIsZeroVector(const T* vector, int v_size) {
														
 
															-  for (int i = 0; i < v_size; ++i) {
														
 
															-    if (vector[i] != 0) {
														
 
															-      return false;
														
 
															-    }
														
 
															-  }
														
 
															-  return true;
														
 
															-}
														
 
															-
														
 
															-void PortableSymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                                     int8_t* quantized_values, float* min_value,
														
 
															-                                     float* max_value, float* scaling_factor);
														
 
															-
														
 
															-void PortableSymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                                     int8_t* quantized_values, float min_value,
														
 
															-                                     float max_value, float* scaling_factor);
														
 
															-
														
 
															-void PortableAsymmetricQuantizeFloats(const float* values, const int size,
														
 
															-                                      int8_t* quantized_values,
														
 
															-                                      float* scaling_factor, int32_t* offset);
														
 
															-
														
 
															-// Multiply a matrix by a batch vector, and store results in a batch-size
														
 
															-// vector.
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
														
 
															-                                                 int m_rows, int m_cols,
														
 
															-                                                 const float* vector,
														
 
															-                                                 int n_batch, float* result);
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
														
 
															-    const int8_t* __restrict__ vectors, const float* scaling_factors,
														
 
															-    int n_batch, float* __restrict__ result);
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
														
 
															-    const int8_t* __restrict__ vectors, const float* scaling_factors,
														
 
															-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
														
 
															-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
														
 
															-    bool* compute_row_sums, CpuBackendContext* context);
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
														
 
															-    const int8_t* __restrict__ vector, const float* scaling_factors,
														
 
															-    int n_batch, int32_t* scratch, float* __restrict__ result,
														
 
															-    CpuBackendContext* context);
														
 
															-
														
 
															-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
														
 
															-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
														
 
															-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
														
 
															-    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
														
 
															-
														
 
															-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
														
 
															-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
														
 
															-    float* __restrict__ result);
														
 
															-
														
 
															-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
														
 
															-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
														
 
															-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
														
 
															-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
														
 
															-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
														
 
															-    const int32_t output_shift, const int32_t output_offset,
														
 
															-    const int32_t output_activation_min, const int32_t output_activation_max,
														
 
															-    int8_t* __restrict__ result);
														
 
															-
														
 
															-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
														
 
															-    const int m_cols, const int8_t* __restrict__ vectors,
														
 
															-    const float* scaling_factors, int n_batch, float* __restrict__ result);
														
 
															-
														
 
															-// Dot product of two vectors.
														
 
															-float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
														
 
															-                                     int v_size);
														
 
															-
														
 
															-void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
														
 
															-                                              const int16_t* vector2,
														
 
															-                                              int v_size, int n_batch,
														
 
															-                                              int32_t* result);
														
 
															-
														
 
															-void PortableVectorBatchVectorCwiseProductAccumulate(
														
 
															-    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
														
 
															-    int32_t multiplier, int shift, int16_t* result);
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* input, const int32_t* bias,
														
 
															-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
														
 
															-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
														
 
															-    int32_t* scratch, int16_t* output, CpuBackendContext* context);
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiplyAccumulate(
														
 
															-    const int8_t* input, const int32_t* bias,
														
 
															-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
														
 
															-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
														
 
															-    int32_t* scratch, int8_t* output, CpuBackendContext* context);
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiply(const int8_t* input,
														
 
															-                                       int32_t input_zeropoint,
														
 
															-                                       const int8_t* input_to_gate_weights,
														
 
															-                                       int32_t input_to_gate_effective_scale_a,
														
 
															-                                       int32_t input_to_gate_effective_scale_b,
														
 
															-                                       int32_t n_batch, int32_t n_input,
														
 
															-                                       int32_t n_cell, int8_t* gate_output,
														
 
															-                                       int8_t gate_output_zp);
														
 
															-
														
 
															-void PortableMatrixBatchVectorMultiply(
														
 
															-    const int16_t* hidden, const int8_t* hidden_to_output_weights,
														
 
															-    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
														
 
															-    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
														
 
															-    int32_t n_output, int32_t output_zp, int8_t* proj_output);
														
 
															-
														
 
															-void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
														
 
															-                                            int32_t scalar, int32_t n_row,
														
 
															-                                            int32_t n_col, int32_t* output);
														
 
															-
														
 
															-void PortableApplyLayerNorm(const int16_t* input,
														
 
															-                            const int16_t* layer_norm_weights,
														
 
															-                            const int32_t* bias, int32_t layer_norm_scale_a,
														
 
															-                            int32_t layer_norm_scale_b, int32_t variance_limit,
														
 
															-                            int n_batch, int n_input, int16_t* output);
														
 
															-
														
 
															-void PortableApplyLayerNormFloat(const int16_t* input,
														
 
															-                                 const int16_t* layer_norm_weights,
														
 
															-                                 int32_t layer_norm_scale_a,
														
 
															-                                 int32_t layer_norm_scale_b,
														
 
															-                                 const int32_t* bias, int n_batch, int n_input,
														
 
															-                                 int16_t* output);
														
 
															-
														
 
															-void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
														
 
															-                          int32_t n_input, int16_t* output);
														
 
															-
														
 
															-void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
														
 
															-                               int32_t n_input, int16_t* output);
														
 
															-
														
 
															-void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
														
 
															-                       int32_t n_batch, int32_t n_input, int16_t* output);
														
 
															-
														
 
															-void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
														
 
															-                            int32_t n_input, int32_t integer_bits,
														
 
															-                            int16_t* output);
														
 
															-
														
 
															-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
														
 
															-                      int n_batch, int n_input, int shift, int16_t* output);
														
 
															-
														
 
															-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
														
 
															-                      int32_t multiplier, int32_t shift, int32_t n_batch,
														
 
															-                      int32_t n_input, int32_t output_zp, int8_t* output);
														
 
															-
														
 
															-void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
														
 
															-                      int n_batch, int n_input, int16_t* output);
														
 
															-
														
 
															-template <typename T>
														
 
															-void PortableCwiseClipping(T* vector, const int v_size,
														
 
															-                           const T& clipping_value) {
														
 
															-  for (int i = 0; i < v_size; i++) {
														
 
															-    vector[i] = std::max(std::min(clipping_value, vector[i]),
														
 
															-                         static_cast<T>(-clipping_value));
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Batch vector initialization with another vector.
														
 
															-void PortableVectorBatchVectorAssign(const float* vector, int v_size,
														
 
															-                                     int n_batch, float* batch_vector);
														
 
															-
														
 
															-// Compute "1.0f - elements of vector" (used in CIFG).
														
 
															-void PortableSub1Vector(const float* vector, int v_size, float* result);
														
 
															-
														
 
															-void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
														
 
															-
														
 
															-// Multiply all elements of vector with a scalar.
														
 
															-void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
														
 
															-                                  float* result);
														
 
															-
														
 
															-// Reduce-sum on a vector:
														
 
															-// input_vector: pointer to input vector.
														
 
															-// output_vector: pointer to vector.
														
 
															-// output_size: output vector size.
														
 
															-// reduction_size: number of consecutive elements from input vector which are
														
 
															-// added to get one element of output.
														
 
															-template <typename INPUT, typename OUTPUT>
														
 
															-void PortableReductionSumVector(const INPUT* input_vector,
														
 
															-                                OUTPUT* output_vector, int output_size,
														
 
															-                                int reduction_size) {
														
 
															-  for (int o = 0; o < output_size; o++) {
														
 
															-    OUTPUT result = 0;
														
 
															-    for (int r = 0; r < reduction_size; r++) {
														
 
															-      result += input_vector[r];
														
 
															-    }
														
 
															-    output_vector[o] = result;
														
 
															-    input_vector += reduction_size;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-// Layer norm for each batch.
														
 
															-void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
														
 
															-                                     float* __restrict__ output_vector,
														
 
															-                                     int v_size, int n_batch);
														
 
															-
														
 
															-// Saturate Add.
														
 
															-void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
														
 
															-                                  const int8_t* recurrent, int8_t recurrent_zp,
														
 
															-                                  int32_t input_effective_scale_a,
														
 
															-                                  int32_t input_effective_scale_b,
														
 
															-                                  int32_t recurrent_effective_scale_a,
														
 
															-                                  int32_t recurrent_effective_scale_b,
														
 
															-                                  int32_t n_batch, int32_t n_cell,
														
 
															-                                  int16_t* output);
														
 
															-
														
 
															-}  // namespace tensor_utils
														
 
															-}  // namespace tflite
														
 
															-
														
 
															-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_