Kaynağa Gözat

removed tflite-lib

CaCO3 3 yıl önce
ebeveyn
işleme
75a653a5c7
100 değiştirilmiş dosya ile 0 ekleme ve 14179 silme
  1. 0 22
      code/components/tflite-lib/tensorflow/lite/builtin_op_data.h
  2. 0 525
      code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h
  3. 0 130
      code/components/tflite-lib/tensorflow/lite/c/c_api_types.h
  4. 0 38
      code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc
  5. 0 59
      code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h
  6. 0 68
      code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc
  7. 0 140
      code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h
  8. 0 50
      code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc
  9. 0 28
      code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h
  10. 0 102
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h
  11. 0 52
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc
  12. 0 50
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h
  13. 0 70
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc
  14. 0 34
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h
  15. 0 134
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
  16. 0 63
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h
  17. 0 220
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c
  18. 0 50
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h
  19. 0 72
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c
  20. 0 64
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h
  21. 0 85
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c
  22. 0 52
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h
  23. 0 48
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h
  24. 0 33
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h
  25. 0 30
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c
  26. 0 40
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h
  27. 0 83
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
  28. 0 39
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h
  29. 0 27
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c
  30. 0 45
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h
  31. 0 51
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c
  32. 0 46
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h
  33. 0 45
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c
  34. 0 50
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h
  35. 0 56
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
  36. 0 47
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h
  37. 0 92
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
  38. 0 57
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
  39. 0 70
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c
  40. 0 49
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h
  41. 0 73
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c
  42. 0 45
      code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h
  43. 0 1180
      code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h
  44. 0 122
      code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h
  45. 0 40
      code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h
  46. 0 35
      code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h
  47. 0 35
      code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h
  48. 0 20
      code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h
  49. 0 122
      code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h
  50. 0 484
      code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h
  51. 0 416
      code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc
  52. 0 292
      code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h
  53. 0 400
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h
  54. 0 86
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h
  55. 0 88
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h
  56. 0 275
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h
  57. 0 101
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h
  58. 0 91
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h
  59. 0 56
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h
  60. 0 97
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h
  61. 0 37
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h
  62. 0 280
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h
  63. 0 141
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h
  64. 0 287
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h
  65. 0 175
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h
  66. 0 79
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h
  67. 0 100
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
  68. 0 319
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
  69. 0 78
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h
  70. 0 247
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h
  71. 0 37
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h
  72. 0 38
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h
  73. 0 38
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h
  74. 0 39
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h
  75. 0 35
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h
  76. 0 44
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h
  77. 0 323
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h
  78. 0 145
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
  79. 0 238
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
  80. 0 291
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
  81. 0 201
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
  82. 0 67
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
  83. 0 121
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
  84. 0 79
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
  85. 0 133
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
  86. 0 264
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
  87. 0 117
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
  88. 0 224
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
  89. 0 90
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h
  90. 0 69
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h
  91. 0 256
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h
  92. 0 132
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h
  93. 0 422
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h
  94. 0 64
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
  95. 0 37
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h
  96. 0 169
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h
  97. 0 303
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h
  98. 0 809
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
  99. 0 333
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
  100. 0 244
      code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h

+ 0 - 22
code/components/tflite-lib/tensorflow/lite/builtin_op_data.h

@@ -1,22 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Compatibility shim for new location of interface definitions.
-
-#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
-#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
-
-#include "tensorflow/lite/c/builtin_op_data.h"
-
-#endif  // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_

+ 0 - 525
code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h

@@ -1,525 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
-#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
-
-#include <stdint.h>
-
-#include "tensorflow/lite/c/common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
-// number of dimensions.
-#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
-
-// TODO(aselle): Consider using "if this then that" for testing.
-
-// Useful placeholder to put in otherwise empty structs to avoid size warnings.
-typedef struct {
-  char dummy;
-} EmptyStructPlaceholder;
-
-// IMPORTANT: All new members of structs must be added at the end to ensure
-// backwards compatibility.
-
-// Possible padding types (for convolutions)
-typedef enum {
-  kTfLitePaddingUnknown = 0,
-  kTfLitePaddingSame,
-  kTfLitePaddingValid,
-} TfLitePadding;
-
-typedef enum {
-  kTfLiteMirrorPaddingUnknown = 0,
-  kTfLiteMirrorPaddingReflect,
-  kTfLiteMirrorPaddingSymmetric,
-} TfLiteMirrorPaddingMode;
-
-// TODO(b/130259536): We should move this out of builtin_op_data.
-typedef struct {
-  int width;
-  int height;
-  int width_offset;
-  int height_offset;
-} TfLitePaddingValues;
-
-typedef struct {
-  TfLiteMirrorPaddingMode mode;
-} TfLiteMirrorPaddingParams;
-
-// Possible fused activation functions.
-typedef enum {
-  kTfLiteActNone = 0,
-  kTfLiteActRelu,
-  kTfLiteActReluN1To1,  // min(max(-1, x), 1)
-  kTfLiteActRelu6,      // min(max(0, x), 6)
-  kTfLiteActTanh,
-  kTfLiteActSignBit,
-  kTfLiteActSigmoid,
-} TfLiteFusedActivation;
-
-typedef struct {
-  // Parameters for CONV_2D version 1.
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  TfLiteFusedActivation activation;
-
-  // Parameters for CONV_2D version 2.
-  // Note: Version 2 supports dilation values not equal to 1.
-  int dilation_width_factor;
-  int dilation_height_factor;
-} TfLiteConvParams;
-
-typedef struct {
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  int stride_depth;
-  int dilation_width_factor;
-  int dilation_height_factor;
-  int dilation_depth_factor;
-  TfLiteFusedActivation activation;
-} TfLiteConv3DParams;
-
-typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
-
-typedef struct {
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  int filter_width;
-  int filter_height;
-  TfLiteFusedActivation activation;
-  struct {
-    TfLitePaddingValues padding;
-  } computed;
-} TfLitePoolParams;
-
-typedef struct {
-  // Parameters for DepthwiseConv version 1 or above.
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  // `depth_multiplier` is redundant. It's used by CPU kernels in
-  // TensorFlow 2.0 or below, but ignored in versions above.
-  //
-  // The information can be deduced from the shape of input and the shape of
-  // weights. Since the TFLiteConverter toolchain doesn't support partially
-  // specified shapes, relying on `depth_multiplier` stops us from supporting
-  // graphs with dynamic shape tensors.
-  //
-  // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
-  // field.
-  int depth_multiplier;
-  TfLiteFusedActivation activation;
-  // Parameters for DepthwiseConv version 2 or above.
-  int dilation_width_factor;
-  int dilation_height_factor;
-} TfLiteDepthwiseConvParams;
-
-typedef struct {
-  int rank;
-  TfLiteFusedActivation activation;
-
-  // Parameter for SVDF version 4.
-  bool asymmetric_quantize_inputs;
-} TfLiteSVDFParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-
-  // Parameter for RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteRNNParams;
-
-typedef struct {
-  bool time_major;
-  TfLiteFusedActivation activation;
-
-  // Parameter for Sequence RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteSequenceRNNParams;
-
-typedef struct {
-  bool time_major;
-  TfLiteFusedActivation activation;
-  bool merge_outputs;
-
-  // Parameter for Bidirectional RNN verison 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteBidirectionalSequenceRNNParams;
-
-typedef enum {
-  kTfLiteFullyConnectedWeightsFormatDefault = 0,
-  kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
-} TfLiteFullyConnectedWeightsFormat;
-
-typedef struct {
-  // Parameters for FullyConnected version 1 or above.
-  TfLiteFusedActivation activation;
-
-  // Parameters for FullyConnected version 2 or above.
-  TfLiteFullyConnectedWeightsFormat weights_format;
-
-  // Parameters for FullyConnected version 5 or above.
-  // If set to true, then the number of dimensions in the input and the output
-  // tensors are the same. Furthermore, all but the last dimension of the input
-  // and output shapes will be equal.
-  bool keep_num_dims;
-
-  // Parameters for FullyConnected version 7 or above.
-  // If set to true and the weights are quantized, then non constant inputs
-  // are quantized at evaluation time with asymmetric quantization.
-  bool asymmetric_quantize_inputs;
-} TfLiteFullyConnectedParams;
-
-typedef enum {
-  kTfLiteLshProjectionUnknown = 0,
-  kTfLiteLshProjectionSparse = 1,
-  kTfLiteLshProjectionDense = 2,
-} TfLiteLSHProjectionType;
-
-typedef struct {
-  TfLiteLSHProjectionType type;
-} TfLiteLSHProjectionParams;
-
-typedef struct {
-  float beta;
-} TfLiteSoftmaxParams;
-
-typedef struct {
-  int axis;
-  TfLiteFusedActivation activation;
-} TfLiteConcatenationParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-  // Parameter added for the version 4.
-  bool pot_scale_int16;
-} TfLiteAddParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteSpaceToBatchNDParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteBatchToSpaceNDParams;
-
-typedef struct {
-  bool adj_x;
-  bool adj_y;
-  // Parameters for BatchMatMul version 4 or above.
-  // If set to true and the weights are quantized, then non constant inputs
-  // are quantized at evaluation time with asymmetric quantization.
-  bool asymmetric_quantize_inputs;
-} TfLiteBatchMatMulParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteMulParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-  // Parameter added for the version 5.
-  bool pot_scale_int16;
-} TfLiteSubParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteDivParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteL2NormParams;
-
-typedef struct {
-  int radius;
-  float bias;
-  float alpha;
-  float beta;
-} TfLiteLocalResponseNormParams;
-
-typedef enum {
-  kTfLiteLSTMFullKernel = 0,
-  kTfLiteLSTMBasicKernel
-} TfLiteLSTMKernelType;
-
-typedef struct {
-  // Parameters for LSTM version 1.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // Parameters for LSTM version 2.
-  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
-  TfLiteLSTMKernelType kernel_type;
-
-  // Parameters for LSTM version 4.
-  bool asymmetric_quantize_inputs;
-} TfLiteLSTMParams;
-
-typedef struct {
-  // Parameters needed for the underlying LSTM.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // If set to true then the first dimension is time, otherwise batch.
-  bool time_major;
-
-  // Parameter for unidirectional sequence RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteUnidirectionalSequenceLSTMParams;
-
-typedef struct {
-  // Parameters supported by version 1:
-  // Parameters inherited for the LSTM kernel.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // If true, store the outputs of both directions in the first output.
-  bool merge_outputs;
-
-  // Parameters supported by version 2:
-  // If set to true then the first dimension is time, otherwise batch.
-  bool time_major;
-
-  // Parameters supported by version 4:
-  // If set to true, then hybrid ops use asymmetric quantization for inputs.
-  bool asymmetric_quantize_inputs;
-} TfLiteBidirectionalSequenceLSTMParams;
-
-typedef struct {
-  bool align_corners;
-  // half_pixel_centers assumes pixels are of half the actual dimensions, and
-  // yields more accurate resizes. Corresponds to the same argument for the
-  // original TensorFlow op in TF2.0.
-  bool half_pixel_centers;
-} TfLiteResizeBilinearParams;
-
-typedef struct {
-  bool align_corners;
-  bool half_pixel_centers;
-} TfLiteResizeNearestNeighborParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLitePadParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLitePadV2Params;
-
-typedef struct {
-  // These fields are only used in old models for backward compatibility.
-  // In the current implementation, we use the 2nd input of the op as the shape,
-  // and these fields are unused.
-  int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
-  int num_dimensions;
-} TfLiteReshapeParams;
-
-typedef struct {
-  int ngram_size;
-  int max_skip_size;
-  bool include_all_ngrams;
-} TfLiteSkipGramParams;
-
-typedef struct {
-  int block_size;
-} TfLiteSpaceToDepthParams;
-
-typedef struct {
-  int block_size;
-} TfLiteDepthToSpaceParams;
-
-typedef struct {
-  TfLiteType in_data_type;
-  TfLiteType out_data_type;
-} TfLiteCastParams;
-
-typedef enum {
-  kTfLiteCombinerTypeSum = 0,
-  kTfLiteCombinerTypeMean = 1,
-  kTfLiteCombinerTypeSqrtn = 2,
-} TfLiteCombinerType;
-
-typedef struct {
-  TfLiteCombinerType combiner;
-} TfLiteEmbeddingLookupSparseParams;
-
-typedef struct {
-  int axis;
-  int batch_dims;
-} TfLiteGatherParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteTransposeParams;
-
-typedef struct {
-  bool keep_dims;
-} TfLiteReducerParams;
-
-typedef struct {
-  int num_splits;
-} TfLiteSplitParams;
-
-typedef struct {
-  int num_splits;
-} TfLiteSplitVParams;
-
-typedef struct {
-  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
-  // For now we will fix the maximum possible number of dimensions.
-  int squeeze_dims[8];
-  int num_squeeze_dims;
-} TfLiteSqueezeParams;
-
-typedef struct {
-  int begin_mask;
-  int end_mask;
-  int ellipsis_mask;
-  int new_axis_mask;
-  int shrink_axis_mask;
-} TfLiteStridedSliceParams;
-
-typedef struct {
-  TfLiteType output_type;
-} TfLiteArgMaxParams;
-
-typedef struct {
-  TfLiteType output_type;
-} TfLiteArgMinParams;
-
-typedef struct {
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-} TfLiteTransposeConvParams;
-
-typedef struct {
-  bool validate_indices;
-} TfLiteSparseToDenseParams;
-
-typedef struct {
-  TfLiteType out_type;
-} TfLiteShapeParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteRankParams;
-
-typedef struct {
-  // Parameters supported by version 1:
-  float min;
-  float max;
-  int num_bits;
-
-  // Parameters supported by version 2:
-  bool narrow_range;
-} TfLiteFakeQuantParams;
-
-typedef struct {
-  int values_count;
-  int axis;
-} TfLitePackParams;
-
-typedef struct {
-  int axis;
-} TfLiteOneHotParams;
-
-typedef struct {
-  int num;
-  int axis;
-} TfLiteUnpackParams;
-
-typedef struct {
-  float alpha;
-} TfLiteLeakyReluParams;
-
-typedef struct {
-  TfLiteType index_out_type;
-} TfLiteUniqueParams;
-
-typedef struct {
-  int seq_dim;
-  int batch_dim;
-} TfLiteReverseSequenceParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteMatrixDiagParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteMatrixSetDiagParams;
-
-typedef struct {
-  int then_subgraph_index;
-  int else_subgraph_index;
-} TfLiteIfParams;
-
-typedef struct {
-  int cond_subgraph_index;
-  int body_subgraph_index;
-} TfLiteWhileParams;
-
-typedef struct {
-  bool exclusive;
-  bool reverse;
-} TfLiteCumsumParams;
-
-typedef struct {
-  int init_subgraph_index;
-} TfLiteCallOnceParams;
-
-typedef struct {
-  int table_id;
-  TfLiteType key_dtype;
-  TfLiteType value_dtype;
-} TfLiteHashtableParams;
-
-typedef struct {
-  const char* container;
-  const char* shared_name;
-} TfLiteVarHandleParams;
-
-typedef struct {
-  int seed;
-  int seed2;
-} TfLiteRandomParams;
-
-typedef struct {
-  int num_boundaries;
-  // This points to the memory stored in the model (flatbuffer),
-  // and is not owned.
-  const float* boundaries;
-} TfLiteBucketizeParams;
-
-typedef struct {
-  bool approximate;
-} TfLiteGeluParams;
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-
-#endif  // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_

+ 0 - 130
code/components/tflite-lib/tensorflow/lite/c/c_api_types.h

@@ -1,130 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This file declares types used by the pure C inference API defined in c_api.h,
-// some of which are also used in the C++ and C kernel and interpreter APIs.
-
-#ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_
-#define TENSORFLOW_LITE_C_C_API_TYPES_H_
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
-// library.
-#ifdef SWIG
-#define TFL_CAPI_EXPORT
-#elif defined(TFL_STATIC_LIBRARY_BUILD)
-#define TFL_CAPI_EXPORT
-#else  // not definded TFL_STATIC_LIBRARY_BUILD
-#if defined(_WIN32)
-#ifdef TFL_COMPILE_LIBRARY
-#define TFL_CAPI_EXPORT __declspec(dllexport)
-#else
-#define TFL_CAPI_EXPORT __declspec(dllimport)
-#endif  // TFL_COMPILE_LIBRARY
-#else
-#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
-#endif  // _WIN32
-#endif  // SWIG
-
-// Note that new error status values may be added in future in order to
-// indicate more fine-grained internal states, therefore, applications should
-// not rely on status values being members of the enum.
-typedef enum TfLiteStatus {
-  kTfLiteOk = 0,
-
-  // Generally referring to an error in the runtime (i.e. interpreter)
-  kTfLiteError = 1,
-
-  // Generally referring to an error from a TfLiteDelegate itself.
-  kTfLiteDelegateError = 2,
-
-  // Generally referring to an error in applying a delegate due to
-  // incompatibility between runtime and delegate, e.g., this error is returned
-  // when trying to apply a TF Lite delegate onto a model graph that's already
-  // immutable.
-  kTfLiteApplicationError = 3,
-
-  // Generally referring to serialized delegate data not being found.
-  // See tflite::delegates::Serialization.
-  kTfLiteDelegateDataNotFound = 4,
-
-  // Generally referring to data-writing issues in delegate serialization.
-  // See tflite::delegates::Serialization.
-  kTfLiteDelegateDataWriteError = 5,
-
-  // Generally referring to data-reading issues in delegate serialization.
-  // See tflite::delegates::Serialization.
-  kTfLiteDelegateDataReadError = 6,
-
-  // Generally referring to issues when the TF Lite model has ops that cannot be
-  // resolved at runtime. This could happen when the specific op is not
-  // registered or built with the TF Lite framework.
-  kTfLiteUnresolvedOps = 7,
-} TfLiteStatus;
-
-// Types supported by tensor
-typedef enum {
-  kTfLiteNoType = 0,
-  kTfLiteFloat32 = 1,
-  kTfLiteInt32 = 2,
-  kTfLiteUInt8 = 3,
-  kTfLiteInt64 = 4,
-  kTfLiteString = 5,
-  kTfLiteBool = 6,
-  kTfLiteInt16 = 7,
-  kTfLiteComplex64 = 8,
-  kTfLiteInt8 = 9,
-  kTfLiteFloat16 = 10,
-  kTfLiteFloat64 = 11,
-  kTfLiteComplex128 = 12,
-  kTfLiteUInt64 = 13,
-  kTfLiteResource = 14,
-  kTfLiteVariant = 15,
-  kTfLiteUInt32 = 16,
-  kTfLiteUInt16 = 17,
-} TfLiteType;
-
-// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
-// If per-layer quantization is specified this field will still be populated in
-// addition to TfLiteAffineQuantization.
-// Parameters for asymmetric quantization. Quantized values can be converted
-// back to float using:
-//     real_value = scale * (quantized_value - zero_point)
-typedef struct TfLiteQuantizationParams {
-  float scale;
-  int32_t zero_point;
-} TfLiteQuantizationParams;
-
-// --------------------------------------------------------------------------
-// Opaque types used by c_api.h, c_api_opaque.h and common.h.
-
-// TfLiteOpaqueContext is an opaque version of TfLiteContext;
-typedef struct TfLiteOpaqueContext TfLiteOpaqueContext;
-
-// TfLiteOpaqueNode is an opaque version of TfLiteNode;
-typedef struct TfLiteOpaqueNode TfLiteOpaqueNode;
-
-// TfLiteOpaqueTensor is an opaque version of TfLiteTensor;
-typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor;
-
-#ifdef __cplusplus
-}  // extern C
-#endif
-#endif  // TENSORFLOW_LITE_C_C_API_TYPES_H_

+ 0 - 38
code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc

@@ -1,38 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include <cstdarg>
-
-namespace tflite {
-
-int ErrorReporter::Report(const char* format, ...) {
-  va_list args;
-  va_start(args, format);
-  int code = Report(format, args);
-  va_end(args);
-  return code;
-}
-
-// TODO(aselle): Make the name of ReportError on context the same, so
-// we can use the ensure functions w/o a context and w/ a reporter.
-int ErrorReporter::ReportError(void*, const char* format, ...) {
-  va_list args;
-  va_start(args, format);
-  int code = Report(format, args);
-  va_end(args);
-  return code;
-}
-
-}  // namespace tflite

+ 0 - 59
code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h

@@ -1,59 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
-#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
-
-#include <cstdarg>
-
-namespace tflite {
-
-/// A functor that reports error to supporting system. Invoked similar to
-/// printf.
-///
-/// Usage:
-///  ErrorReporter foo;
-///  foo.Report("test %d", 5);
-/// or
-///  va_list args;
-///  foo.Report("test %d", args); // where args is va_list
-///
-/// Subclass ErrorReporter to provide another reporting destination.
-/// For example, if you have a GUI program, you might redirect to a buffer
-/// that drives a GUI error log box.
-class ErrorReporter {
- public:
-  virtual ~ErrorReporter() {}
-  virtual int Report(const char* format, va_list args) = 0;
-  int Report(const char* format, ...);
-  int ReportError(void*, const char* format, ...);
-};
-
-}  // namespace tflite
-
-// You should not make bare calls to the error reporter, instead use the
-// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
-// stripped when the binary size has to be optimized. If you are looking to
-// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
-// every call will be stubbed out, taking no memory.
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-#define TF_LITE_REPORT_ERROR(reporter, ...)                             \
-  do {                                                                  \
-    static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
-  } while (false)
-#else  // TF_LITE_STRIP_ERROR_STRINGS
-#define TF_LITE_REPORT_ERROR(reporter, ...)
-#endif  // TF_LITE_STRIP_ERROR_STRINGS
-
-#endif  // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_

+ 0 - 68
code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc

@@ -1,68 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/core/api/op_resolver.h"
-
-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/schema/schema_utils.h"
-
-namespace tflite {
-
-TfLiteStatus GetRegistrationFromOpCode(
-    const OperatorCode* opcode, const OpResolver& op_resolver,
-    ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
-  TfLiteStatus status = kTfLiteOk;
-  *registration = nullptr;
-  auto builtin_code = GetBuiltinCode(opcode);
-  int version = opcode->version();
-
-  if (builtin_code > BuiltinOperator_MAX) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter,
-        "Op builtin_code out of range: %d. Are you using old TFLite binary "
-        "with newer model?",
-        builtin_code);
-    status = kTfLiteError;
-  } else if (builtin_code != BuiltinOperator_CUSTOM) {
-    *registration = op_resolver.FindOp(builtin_code, version);
-    if (*registration == nullptr) {
-      TF_LITE_REPORT_ERROR(
-          error_reporter,
-          "Didn't find op for builtin opcode '%s' version '%d'. "
-          "An older version of this builtin might be supported. "
-          "Are you using an old TFLite binary with a newer model?\n",
-          EnumNameBuiltinOperator(builtin_code), version);
-      status = kTfLiteError;
-    }
-  } else if (!opcode->custom_code()) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter,
-        "Operator with CUSTOM builtin_code has no custom_code.\n");
-    status = kTfLiteError;
-  } else {
-    const char* name = opcode->custom_code()->c_str();
-    *registration = op_resolver.FindOp(name, version);
-    if (*registration == nullptr) {
-      // Do not report error for unresolved custom op, we do the final check
-      // while preparing ops.
-      status = kTfLiteError;
-    }
-  }
-  return status;
-}
-
-}  // namespace tflite

+ 0 - 140
code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h

@@ -1,140 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
-#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
-
-#include <functional>
-#include <memory>
-#include <vector>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-
-// Opaque type similar to TfLiteDelegate / TfLiteOpaqueDelegate.
-// This is used for cases (e.g. when using "TF Lite with Google Play Services")
-// where the TF Lite runtime might be built using a newer (or older)
-// version of the TF Lite sources than the app, and hence might have a
-// different definition of the TfLiteDelegate type. TF Lite APIs use
-// TfLiteOpaqueDelegate rather than TfLiteDelegate when they want to
-// refer to a delegate defined with that potentially different version
-// of the TfLiteDelegate type.
-struct TfLiteOpaqueDelegateStruct;
-
-namespace tflite {
-
-/// Abstract interface that returns TfLiteRegistrations given op codes or custom
-/// op names. This is the mechanism that ops being referenced in the flatbuffer
-/// model are mapped to executable function pointers (TfLiteRegistrations).
-class OpResolver {
- public:
-  /// Finds the op registration for a builtin operator by enum code.
-  virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
-                                           int version) const = 0;
-  /// Finds the op registration of a custom operator by op name.
-  virtual const TfLiteRegistration* FindOp(const char* op,
-                                           int version) const = 0;
-
-  // Represents a sequence of delegates.
-  using TfLiteDelegatePtrVector =
-      std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
-
-  // Returns optional delegates for resolving and handling ops in the flatbuffer
-  // model. This may be used in addition to the standard TfLiteRegistration
-  // lookup for graph resolution.
-  // WARNING: This API is deprecated, GetDelegateCreators is preferred.
-  virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
-    return {};
-  }
-
-  // Represents a function that creates a TfLite delegate instance.
-  using TfLiteDelegateCreator =
-      std::function<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
-          int /*num_threads*/)>;
-
-  // Represents a sequence of delegate creator functions.
-  using TfLiteDelegateCreators = std::vector<TfLiteDelegateCreator>;
-
-  // Returns a vector of delegate creators to create optional delegates for
-  // resolving and handling ops in the flatbuffer model. This may be used in
-  // addition to the standard TfLiteRegistration lookup for graph resolution.
-  //
-  // Note that this method is not used (will not be called) if you are using
-  // TF Lite in Google Play Services; the GetOpaqueDelegateCreators method
-  // (see below) is used for that case.
-  virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; }
-
-  // TODO(b/202712825): it would be nice if we could avoid the need for separate
-  // "opaque" types & methods for use only with TF Lite in Google Play Services.
-
-  // Represents an opaque delegate instance.
-  // WARNING: Experimental interface, subject to change.
-  using TfLiteOpaqueDelegatePtr =
-      std::unique_ptr<TfLiteOpaqueDelegateStruct,
-                      void (*)(TfLiteOpaqueDelegateStruct*)>;
-
-  // Represents a function that creates an opaque delegate instance.
-  // WARNING: Experimental interface, subject to change.
-  using TfLiteOpaqueDelegateCreator =
-      std::function<TfLiteOpaqueDelegatePtr(int /*num_threads*/)>;
-
-  // Represents a sequence of opaque delegate creator functions.
-  // WARNING: Experimental interface, subject to change.
-  using TfLiteOpaqueDelegateCreators = std::vector<TfLiteOpaqueDelegateCreator>;
-
-  // Returns a vector of opaque delegate creators to create optional opaque
-  // delegates for resolving and handling ops in the flatbuffer model. This may
-  // be used in addition to the standard TfLiteRegistration lookup for graph
-  // resolution.
-  //
-  // Note that this method will be called only if you are using TF Lite in
-  // Google Play Services; if you are using regular TF Lite, GetDelegateCreators
-  // (see above) is used instead.
-  //
-  // WARNING: Experimental interface, subject to change.
-  virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const {
-    return {};
-  }
-
-  virtual ~OpResolver() {}
-
- private:
-  /// Returns true if this OpResolver may contain any "user defined" ops.
-  /// By "user defined" ops, we mean any op definitions other than those
-  /// contained in tflite::ops::builtin::BuiltinOpResolver.
-  ///
-  /// If this method returns true, it doesn't necessarily mean that the
-  /// OpResolver contains a user-defined op, just that the absence of
-  /// user-defined ops can't be guaranteed.
-  ///
-  /// Note that "user-defined" ops are not the same as "custom" ops;
-  /// BuiltinOpResolver may support certain "custom" ops, in addition to
-  /// "builtin" ops, and may not support all of the "builtin" op enum values.
-  virtual bool MayContainUserDefinedOps() const { return true; }
-
-  friend class OpResolverInternal;
-};
-
-// Handles the logic for converting between an OperatorCode structure extracted
-// from a flatbuffer and information about a registered operator
-// implementation.
-TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
-                                       const OpResolver& op_resolver,
-                                       ErrorReporter* error_reporter,
-                                       const TfLiteRegistration** registration);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_

+ 0 - 50
code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc

@@ -1,50 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/core/api/tensor_utils.h"
-
-#include <string.h>
-
-#include "tensorflow/lite/c/common.h"
-
-namespace tflite {
-
-TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
-  if (!tensor->is_variable) {
-    return kTfLiteOk;
-  }
-  // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
-  // to the value of the buffer.
-  int value = 0;
-  if (tensor->type == kTfLiteInt8) {
-    value = tensor->params.zero_point;
-  }
-  // TODO(b/139446230): Provide a platform header to better handle these
-  // specific scenarios.
-#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
-    defined(__i386) || defined(__x86__) || defined(__X86__) || \
-    defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
-  memset(tensor->data.raw, value, tensor->bytes);
-#else
-  char* raw_ptr = tensor->data.raw;
-  for (size_t i = 0; i < tensor->bytes; ++i) {
-    *raw_ptr = value;
-    raw_ptr++;
-  }
-#endif
-  return kTfLiteOk;
-}
-
-}  // namespace tflite

+ 0 - 28
code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h

@@ -1,28 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
-#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
-
-#include "tensorflow/lite/c/common.h"
-
-namespace tflite {
-
-// Resets a variable tensor to the default value.
-TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_

+ 0 - 102
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h

@@ -1,102 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
-
-#ifdef __cplusplus
-#include <cstdint>
-
-extern "C" {
-#endif
-
-static inline int CountLeadingZeros32Slow(uint64_t n) {
-  int zeroes = 28;
-  if (n >> 16) zeroes -= 16, n >>= 16;
-  if (n >> 8) zeroes -= 8, n >>= 8;
-  if (n >> 4) zeroes -= 4, n >>= 4;
-  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
-}
-
-static inline int CountLeadingZeros32(uint32_t n) {
-#if defined(_MSC_VER)
-  unsigned long result = 0;  // NOLINT(runtime/int)
-  if (_BitScanReverse(&result, n)) {
-    return 31 - result;
-  }
-  return 32;
-#elif defined(__GNUC__)
-
-  // Handle 0 as a special case because __builtin_clz(0) is undefined.
-  if (n == 0) {
-    return 32;
-  }
-  return __builtin_clz(n);
-#else
-  return CountLeadingZeros32Slow(n);
-#endif
-}
-
-static inline int MostSignificantBit32(uint32_t n) {
-  return 32 - CountLeadingZeros32(n);
-}
-
-static inline int CountLeadingZeros64Slow(uint64_t n) {
-  int zeroes = 60;
-  if (n >> 32) zeroes -= 32, n >>= 32;
-  if (n >> 16) zeroes -= 16, n >>= 16;
-  if (n >> 8) zeroes -= 8, n >>= 8;
-  if (n >> 4) zeroes -= 4, n >>= 4;
-  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
-}
-
-static inline int CountLeadingZeros64(uint64_t n) {
-#if defined(_MSC_VER) && defined(_M_X64)
-  // MSVC does not have __builtin_clzll. Use _BitScanReverse64.
-  unsigned long result = 0;  // NOLINT(runtime/int)
-  if (_BitScanReverse64(&result, n)) {
-    return 63 - result;
-  }
-  return 64;
-#elif defined(_MSC_VER)
-  // MSVC does not have __builtin_clzll. Compose two calls to _BitScanReverse
-  unsigned long result = 0;  // NOLINT(runtime/int)
-  if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
-    return 31 - result;
-  }
-  if (_BitScanReverse(&result, n)) {
-    return 63 - result;
-  }
-  return 64;
-#elif defined(__GNUC__)
-
-  // Handle 0 as a special case because __builtin_clzll(0) is undefined.
-  if (n == 0) {
-    return 64;
-  }
-  return __builtin_clzll(n);
-#else
-  return CountLeadingZeros64Slow(n);
-#endif
-}
-
-static inline int MostSignificantBit64(uint64_t n) {
-  return 64 - CountLeadingZeros64(n);
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_

+ 0 - 52
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc

@@ -1,52 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
-
-#include <string.h>
-
-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
-
-void FftCompute(struct FftState* state, const int16_t* input,
-                int input_scale_shift) {
-  const size_t input_size = state->input_size;
-  const size_t fft_size = state->fft_size;
-
-  int16_t* fft_input = state->input;
-  // First, scale the input by the given shift.
-  size_t i;
-  for (i = 0; i < input_size; ++i) {
-    fft_input[i] = static_cast<int16_t>(static_cast<uint16_t>(input[i])
-                                        << input_scale_shift);
-  }
-  // Zero out whatever else remains in the top part of the input.
-  for (; i < fft_size; ++i) {
-    fft_input[i] = 0;
-  }
-
-  // Apply the FFT.
-  kissfft_fixed16::kiss_fftr(
-      reinterpret_cast<kissfft_fixed16::kiss_fftr_cfg>(state->scratch),
-      state->input,
-      reinterpret_cast<kissfft_fixed16::kiss_fft_cpx*>(state->output));
-}
-
-void FftInit(struct FftState* state) {
-  // All the initialization is done in FftPopulateState()
-}
-
-void FftReset(struct FftState* state) {
-  memset(state->input, 0, state->fft_size * sizeof(*state->input));
-  memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output));
-}

+ 0 - 50
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h

@@ -1,50 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct complex_int16_t {
-  int16_t real;
-  int16_t imag;
-};
-
-struct FftState {
-  int16_t* input;
-  struct complex_int16_t* output;
-  size_t fft_size;
-  size_t input_size;
-  void* scratch;
-  size_t scratch_size;
-};
-
-void FftCompute(struct FftState* state, const int16_t* input,
-                int input_scale_shift);
-
-void FftInit(struct FftState* state);
-
-void FftReset(struct FftState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_

+ 0 - 70
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc

@@ -1,70 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
-
-#include <stdio.h>
-
-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
-
-int FftPopulateState(struct FftState* state, size_t input_size) {
-  state->input_size = input_size;
-  state->fft_size = 1;
-  while (state->fft_size < state->input_size) {
-    state->fft_size <<= 1;
-  }
-
-  state->input = reinterpret_cast<int16_t*>(
-      malloc(state->fft_size * sizeof(*state->input)));
-  if (state->input == nullptr) {
-    fprintf(stderr, "Failed to alloc fft input buffer\n");
-    return 0;
-  }
-
-  state->output = reinterpret_cast<complex_int16_t*>(
-      malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
-  if (state->output == nullptr) {
-    fprintf(stderr, "Failed to alloc fft output buffer\n");
-    return 0;
-  }
-
-  // Ask kissfft how much memory it wants.
-  size_t scratch_size = 0;
-  kissfft_fixed16::kiss_fftr_cfg kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(
-      state->fft_size, 0, nullptr, &scratch_size);
-  if (kfft_cfg != nullptr) {
-    fprintf(stderr, "Kiss memory sizing failed.\n");
-    return 0;
-  }
-  state->scratch = malloc(scratch_size);
-  if (state->scratch == nullptr) {
-    fprintf(stderr, "Failed to alloc fft scratch buffer\n");
-    return 0;
-  }
-  state->scratch_size = scratch_size;
-  // Let kissfft configure the scratch space we just allocated
-  kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(state->fft_size, 0,
-                                              state->scratch, &scratch_size);
-  if (kfft_cfg != state->scratch) {
-    fprintf(stderr, "Kiss memory preallocation strategy failed.\n");
-    return 0;
-  }
-  return 1;
-}
-
-void FftFreeStateContents(struct FftState* state) {
-  free(state->input);
-  free(state->output);
-  free(state->scratch);
-}

+ 0 - 34
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h

@@ -1,34 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
-
-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Prepares and FFT for the given input size.
-int FftPopulateState(struct FftState* state, size_t input_size);
-
-// Frees any allocated buffers.
-void FftFreeStateContents(struct FftState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_

+ 0 - 134
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c

@@ -1,134 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
-
-#include <string.h>
-
-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
-
-void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
-                                         struct complex_int16_t* fft_output,
-                                         int32_t* energy) {
-  const int end_index = state->end_index;
-  int i;
-  energy += state->start_index;
-  fft_output += state->start_index;
-  for (i = state->start_index; i < end_index; ++i) {
-    const int32_t real = fft_output->real;
-    const int32_t imag = fft_output->imag;
-    fft_output++;
-    const uint32_t mag_squared = (real * real) + (imag * imag);
-    *energy++ = mag_squared;
-  }
-}
-
-void FilterbankAccumulateChannels(struct FilterbankState* state,
-                                  const int32_t* energy) {
-  uint64_t* work = state->work;
-  uint64_t weight_accumulator = 0;
-  uint64_t unweight_accumulator = 0;
-
-  const int16_t* channel_frequency_starts = state->channel_frequency_starts;
-  const int16_t* channel_weight_starts = state->channel_weight_starts;
-  const int16_t* channel_widths = state->channel_widths;
-
-  int num_channels_plus_1 = state->num_channels + 1;
-  int i;
-  for (i = 0; i < num_channels_plus_1; ++i) {
-    const int32_t* magnitudes = energy + *channel_frequency_starts++;
-    const int16_t* weights = state->weights + *channel_weight_starts;
-    const int16_t* unweights = state->unweights + *channel_weight_starts++;
-    const int width = *channel_widths++;
-    int j;
-    for (j = 0; j < width; ++j) {
-      weight_accumulator += *weights++ * ((uint64_t)*magnitudes);
-      unweight_accumulator += *unweights++ * ((uint64_t)*magnitudes);
-      ++magnitudes;
-    }
-    *work++ = weight_accumulator;
-    weight_accumulator = unweight_accumulator;
-    unweight_accumulator = 0;
-  }
-}
-
-static uint16_t Sqrt32(uint32_t num) {
-  if (num == 0) {
-    return 0;
-  }
-  uint32_t res = 0;
-  int max_bit_number = 32 - MostSignificantBit32(num);
-  max_bit_number |= 1;
-  uint32_t bit = 1U << (31 - max_bit_number);
-  int iterations = (31 - max_bit_number) / 2 + 1;
-  while (iterations--) {
-    if (num >= res + bit) {
-      num -= res + bit;
-      res = (res >> 1U) + bit;
-    } else {
-      res >>= 1U;
-    }
-    bit >>= 2U;
-  }
-  // Do rounding - if we have the bits.
-  if (num > res && res != 0xFFFF) {
-    ++res;
-  }
-  return res;
-}
-
-static uint32_t Sqrt64(uint64_t num) {
-  // Take a shortcut and just use 32 bit operations if the upper word is all
-  // clear. This will cause a slight off by one issue for numbers close to 2^32,
-  // but it probably isn't going to matter (and gives us a big performance win).
-  if ((num >> 32) == 0) {
-    return Sqrt32((uint32_t)num);
-  }
-  uint64_t res = 0;
-  int max_bit_number = 64 - MostSignificantBit64(num);
-  max_bit_number |= 1;
-  uint64_t bit = 1ULL << (63 - max_bit_number);
-  int iterations = (63 - max_bit_number) / 2 + 1;
-  while (iterations--) {
-    if (num >= res + bit) {
-      num -= res + bit;
-      res = (res >> 1U) + bit;
-    } else {
-      res >>= 1U;
-    }
-    bit >>= 2U;
-  }
-  // Do rounding - if we have the bits.
-  if (num > res && res != 0xFFFFFFFFLL) {
-    ++res;
-  }
-  return res;
-}
-
-uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
-  const int num_channels = state->num_channels;
-  const uint64_t* work = state->work + 1;
-  // Reuse the work buffer since we're fine clobbering it at this point to hold
-  // the output.
-  uint32_t* output = (uint32_t*)state->work;
-  int i;
-  for (i = 0; i < num_channels; ++i) {
-    *output++ = Sqrt64(*work++) >> scale_down_shift;
-  }
-  return (uint32_t*)state->work;
-}
-
-void FilterbankReset(struct FilterbankState* state) {
-  memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work));
-}

+ 0 - 63
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h

@@ -1,63 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
-
-#define kFilterbankBits 12
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct FilterbankState {
-  int num_channels;
-  int start_index;
-  int end_index;
-  int16_t* channel_frequency_starts;
-  int16_t* channel_weight_starts;
-  int16_t* channel_widths;
-  int16_t* weights;
-  int16_t* unweights;
-  uint64_t* work;
-};
-
-// Converts the relevant complex values of an FFT output into energy (the
-// square magnitude).
-void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
-                                         struct complex_int16_t* fft_output,
-                                         int32_t* energy);
-
-// Computes the mel-scale filterbank on the given energy array. Output is cached
-// internally - to fetch it, you need to call FilterbankSqrt.
-void FilterbankAccumulateChannels(struct FilterbankState* state,
-                                  const int32_t* energy);
-
-// Applies an integer square root to the 64 bit intermediate values of the
-// filterbank, and returns a pointer to them. Memory will be invalidated the
-// next time FilterbankAccumulateChannels is called.
-uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift);
-
-void FilterbankReset(struct FilterbankState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_

+ 0 - 220
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c

@@ -1,220 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-
-#define kFilterbankIndexAlignment 4
-#define kFilterbankChannelBlockSize 4
-
-void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) {
-  config->num_channels = 32;
-  config->lower_band_limit = 125.0f;
-  config->upper_band_limit = 7500.0f;
-  config->output_scale_shift = 7;
-}
-
-static float FreqToMel(float freq) { return 1127.0 * log1p(freq / 700.0); }
-
-static void CalculateCenterFrequencies(const int num_channels,
-                                       const float lower_frequency_limit,
-                                       const float upper_frequency_limit,
-                                       float* center_frequencies) {
-  assert(lower_frequency_limit >= 0.0f);
-  assert(upper_frequency_limit > lower_frequency_limit);
-
-  const float mel_low = FreqToMel(lower_frequency_limit);
-  const float mel_hi = FreqToMel(upper_frequency_limit);
-  const float mel_span = mel_hi - mel_low;
-  const float mel_spacing = mel_span / ((float)num_channels);
-  int i;
-  for (i = 0; i < num_channels; ++i) {
-    center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
-  }
-}
-
-static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight,
-                                      int16_t* unweight) {
-  *weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
-  *unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
-}
-
-int FilterbankPopulateState(const struct FilterbankConfig* config,
-                            struct FilterbankState* state, int sample_rate,
-                            int spectrum_size) {
-  state->num_channels = config->num_channels;
-  const int num_channels_plus_1 = config->num_channels + 1;
-
-  // How should we align things to index counts given the byte alignment?
-  const int index_alignment =
-      (kFilterbankIndexAlignment < sizeof(int16_t)
-           ? 1
-           : kFilterbankIndexAlignment / sizeof(int16_t));
-
-  state->channel_frequency_starts =
-      malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
-  state->channel_weight_starts =
-      malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
-  state->channel_widths =
-      malloc(num_channels_plus_1 * sizeof(*state->channel_widths));
-  state->work = malloc(num_channels_plus_1 * sizeof(*state->work));
-
-  float* center_mel_freqs =
-      malloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
-  int16_t* actual_channel_starts =
-      malloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
-  int16_t* actual_channel_widths =
-      malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
-
-  if (state->channel_frequency_starts == NULL ||
-      state->channel_weight_starts == NULL || state->channel_widths == NULL ||
-      center_mel_freqs == NULL || actual_channel_starts == NULL ||
-      actual_channel_widths == NULL) {
-    free(center_mel_freqs);
-    free(actual_channel_starts);
-    free(actual_channel_widths);
-    fprintf(stderr, "Failed to allocate channel buffers\n");
-    return 0;
-  }
-
-  CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
-                             config->upper_band_limit, center_mel_freqs);
-
-  // Always exclude DC.
-  const float hz_per_sbin = 0.5 * sample_rate / ((float)spectrum_size - 1);
-  state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
-  state->end_index = 0;  // Initialized to zero here, but actually set below.
-
-  // For each channel, we need to figure out what frequencies belong to it, and
-  // how much padding we need to add so that we can efficiently multiply the
-  // weights and unweights for accumulation. To simplify the multiplication
-  // logic, all channels will have some multiplication to do (even if there are
-  // no frequencies that accumulate to that channel) - they will be directed to
-  // a set of zero weights.
-  int chan_freq_index_start = state->start_index;
-  int weight_index_start = 0;
-  int needs_zeros = 0;
-
-  int chan;
-  for (chan = 0; chan < num_channels_plus_1; ++chan) {
-    // Keep jumping frequencies until we overshoot the bound on this channel.
-    int freq_index = chan_freq_index_start;
-    while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) {
-      ++freq_index;
-    }
-
-    const int width = freq_index - chan_freq_index_start;
-    actual_channel_starts[chan] = chan_freq_index_start;
-    actual_channel_widths[chan] = width;
-
-    if (width == 0) {
-      // This channel doesn't actually get anything from the frequencies, it's
-      // always zero. We need then to insert some 'zero' weights into the
-      // output, and just redirect this channel to do a single multiplication at
-      // this point. For simplicity, the zeros are placed at the beginning of
-      // the weights arrays, so we have to go and update all the other
-      // weight_starts to reflect this shift (but only once).
-      state->channel_frequency_starts[chan] = 0;
-      state->channel_weight_starts[chan] = 0;
-      state->channel_widths[chan] = kFilterbankChannelBlockSize;
-      if (!needs_zeros) {
-        needs_zeros = 1;
-        int j;
-        for (j = 0; j < chan; ++j) {
-          state->channel_weight_starts[j] += kFilterbankChannelBlockSize;
-        }
-        weight_index_start += kFilterbankChannelBlockSize;
-      }
-    } else {
-      // How far back do we need to go to ensure that we have the proper
-      // alignment?
-      const int aligned_start =
-          (chan_freq_index_start / index_alignment) * index_alignment;
-      const int aligned_width = (chan_freq_index_start - aligned_start + width);
-      const int padded_width =
-          (((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
-          kFilterbankChannelBlockSize;
-
-      state->channel_frequency_starts[chan] = aligned_start;
-      state->channel_weight_starts[chan] = weight_index_start;
-      state->channel_widths[chan] = padded_width;
-      weight_index_start += padded_width;
-    }
-    chan_freq_index_start = freq_index;
-  }
-
-  // Allocate the two arrays to store the weights - weight_index_start contains
-  // the index of what would be the next set of weights that we would need to
-  // add, so that's how many weights we need to allocate.
-  state->weights = calloc(weight_index_start, sizeof(*state->weights));
-  state->unweights = calloc(weight_index_start, sizeof(*state->unweights));
-
-  // If the alloc failed, we also need to nuke the arrays.
-  if (state->weights == NULL || state->unweights == NULL) {
-    free(center_mel_freqs);
-    free(actual_channel_starts);
-    free(actual_channel_widths);
-    fprintf(stderr, "Failed to allocate weights or unweights\n");
-    return 0;
-  }
-
-  // Next pass, compute all the weights. Since everything has been memset to
-  // zero, we only need to fill in the weights that correspond to some frequency
-  // for a channel.
-  const float mel_low = FreqToMel(config->lower_band_limit);
-  for (chan = 0; chan < num_channels_plus_1; ++chan) {
-    int frequency = actual_channel_starts[chan];
-    const int num_frequencies = actual_channel_widths[chan];
-    const int frequency_offset =
-        frequency - state->channel_frequency_starts[chan];
-    const int weight_start = state->channel_weight_starts[chan];
-    const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1];
-
-    int j;
-    for (j = 0; j < num_frequencies; ++j, ++frequency) {
-      const float weight =
-          (center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) /
-          (center_mel_freqs[chan] - denom_val);
-
-      // Make the float into an integer for the weights (and unweights).
-      const int weight_index = weight_start + frequency_offset + j;
-      QuantizeFilterbankWeights(weight, state->weights + weight_index,
-                                state->unweights + weight_index);
-    }
-    if (frequency > state->end_index) {
-      state->end_index = frequency;
-    }
-  }
-
-  free(center_mel_freqs);
-  free(actual_channel_starts);
-  free(actual_channel_widths);
-  if (state->end_index >= spectrum_size) {
-    fprintf(stderr, "Filterbank end_index is above spectrum size.\n");
-    return 0;
-  }
-  return 1;
-}
-
-void FilterbankFreeStateContents(struct FilterbankState* state) {
-  free(state->channel_frequency_starts);
-  free(state->channel_weight_starts);
-  free(state->channel_widths);
-  free(state->weights);
-  free(state->unweights);
-  free(state->work);
-}

+ 0 - 50
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h

@@ -1,50 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
-
-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct FilterbankConfig {
-  // number of frequency channel buckets for filterbank
-  int num_channels;
-  // maximum frequency to include
-  float upper_band_limit;
-  // minimum frequency to include
-  float lower_band_limit;
-  // unused
-  int output_scale_shift;
-};
-
-// Fills the frontendConfig with "sane" defaults.
-void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config);
-
-// Allocates any buffers.
-int FilterbankPopulateState(const struct FilterbankConfig* config,
-                            struct FilterbankState* state, int sample_rate,
-                            int spectrum_size);
-
-// Frees any allocated buffers.
-void FilterbankFreeStateContents(struct FilterbankState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_

+ 0 - 72
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c

@@ -1,72 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
-
-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
-
-struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
-                                             const int16_t* samples,
-                                             size_t num_samples,
-                                             size_t* num_samples_read) {
-  struct FrontendOutput output;
-  output.values = NULL;
-  output.size = 0;
-
-  // Try to apply the window - if it fails, return and wait for more data.
-  if (!WindowProcessSamples(&state->window, samples, num_samples,
-                            num_samples_read)) {
-    return output;
-  }
-
-  // Apply the FFT to the window's output (and scale it so that the fixed point
-  // FFT can have as much resolution as possible).
-  int input_shift =
-      15 - MostSignificantBit32(state->window.max_abs_output_value);
-  FftCompute(&state->fft, state->window.output, input_shift);
-
-  // We can re-ruse the fft's output buffer to hold the energy.
-  int32_t* energy = (int32_t*)state->fft.output;
-
-  FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
-                                      energy);
-
-  FilterbankAccumulateChannels(&state->filterbank, energy);
-  uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift);
-
-  // Apply noise reduction.
-  NoiseReductionApply(&state->noise_reduction, scaled_filterbank);
-
-  if (state->pcan_gain_control.enable_pcan) {
-    PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank);
-  }
-
-  // Apply the log and scale.
-  int correction_bits =
-      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
-  uint16_t* logged_filterbank =
-      LogScaleApply(&state->log_scale, scaled_filterbank,
-                    state->filterbank.num_channels, correction_bits);
-
-  output.size = state->filterbank.num_channels;
-  output.values = logged_filterbank;
-  return output;
-}
-
-void FrontendReset(struct FrontendState* state) {
-  WindowReset(&state->window);
-  FftReset(&state->fft);
-  FilterbankReset(&state->filterbank);
-  NoiseReductionReset(&state->noise_reduction);
-}

+ 0 - 64
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h

@@ -1,64 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct FrontendState {
-  struct WindowState window;
-  struct FftState fft;
-  struct FilterbankState filterbank;
-  struct NoiseReductionState noise_reduction;
-  struct PcanGainControlState pcan_gain_control;
-  struct LogScaleState log_scale;
-};
-
-struct FrontendOutput {
-  const uint16_t* values;
-  size_t size;
-};
-
-// Main entry point to processing frontend samples. Updates num_samples_read to
-// contain the number of samples that have been consumed from the input array.
-// Returns a struct containing the generated output. If not enough samples were
-// added to generate a feature vector, the returned size will be 0 and the
-// values pointer will be NULL. Note that the output pointer will be invalidated
-// as soon as FrontendProcessSamples is called again, so copy the contents
-// elsewhere if you need to use them later.
-struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
-                                             const int16_t* samples,
-                                             size_t num_samples,
-                                             size_t* num_samples_read);
-
-void FrontendReset(struct FrontendState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_

+ 0 - 85
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c

@@ -1,85 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
-
-void FrontendFillConfigWithDefaults(struct FrontendConfig* config) {
-  WindowFillConfigWithDefaults(&config->window);
-  FilterbankFillConfigWithDefaults(&config->filterbank);
-  NoiseReductionFillConfigWithDefaults(&config->noise_reduction);
-  PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control);
-  LogScaleFillConfigWithDefaults(&config->log_scale);
-}
-
-int FrontendPopulateState(const struct FrontendConfig* config,
-                          struct FrontendState* state, int sample_rate) {
-  memset(state, 0, sizeof(*state));
-
-  if (!WindowPopulateState(&config->window, &state->window, sample_rate)) {
-    fprintf(stderr, "Failed to populate window state\n");
-    return 0;
-  }
-
-  if (!FftPopulateState(&state->fft, state->window.size)) {
-    fprintf(stderr, "Failed to populate fft state\n");
-    return 0;
-  }
-  FftInit(&state->fft);
-
-  if (!FilterbankPopulateState(&config->filterbank, &state->filterbank,
-                               sample_rate, state->fft.fft_size / 2 + 1)) {
-    fprintf(stderr, "Failed to populate filterbank state\n");
-    return 0;
-  }
-
-  if (!NoiseReductionPopulateState(&config->noise_reduction,
-                                   &state->noise_reduction,
-                                   state->filterbank.num_channels)) {
-    fprintf(stderr, "Failed to populate noise reduction state\n");
-    return 0;
-  }
-
-  int input_correction_bits =
-      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
-  if (!PcanGainControlPopulateState(
-          &config->pcan_gain_control, &state->pcan_gain_control,
-          state->noise_reduction.estimate, state->filterbank.num_channels,
-          state->noise_reduction.smoothing_bits, input_correction_bits)) {
-    fprintf(stderr, "Failed to populate pcan gain control state\n");
-    return 0;
-  }
-
-  if (!LogScalePopulateState(&config->log_scale, &state->log_scale)) {
-    fprintf(stderr, "Failed to populate log scale state\n");
-    return 0;
-  }
-
-  FrontendReset(state);
-
-  // All good, return a true value.
-  return 1;
-}
-
-void FrontendFreeStateContents(struct FrontendState* state) {
-  WindowFreeStateContents(&state->window);
-  FftFreeStateContents(&state->fft);
-  FilterbankFreeStateContents(&state->filterbank);
-  NoiseReductionFreeStateContents(&state->noise_reduction);
-  PcanGainControlFreeStateContents(&state->pcan_gain_control);
-}

+ 0 - 52
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h

@@ -1,52 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
-
-#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct FrontendConfig {
-  struct WindowConfig window;
-  struct FilterbankConfig filterbank;
-  struct NoiseReductionConfig noise_reduction;
-  struct PcanGainControlConfig pcan_gain_control;
-  struct LogScaleConfig log_scale;
-};
-
-// Fills the frontendConfig with "sane" defaults.
-void FrontendFillConfigWithDefaults(struct FrontendConfig* config);
-
-// Allocates any buffers.
-int FrontendPopulateState(const struct FrontendConfig* config,
-                          struct FrontendState* state, int sample_rate);
-
-// Frees any allocated buffers.
-void FrontendFreeStateContents(struct FrontendState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_

+ 0 - 48
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h

@@ -1,48 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
-
-// This header file should be included in all variants of kiss_fft_$type.{h,cc}
-// so that their sub-included source files do not mistakenly wrap libc header
-// files within their kissfft_$type namespaces.
-// E.g, This header avoids kissfft_int16.h containing:
-//   namespace kiss_fft_int16 {
-//     #include "kiss_fft.h"
-//   }
-// where kiss_fft_.h contains:
-//   #include <math.h>
-//
-// TRICK: By including the following header files here, their preprocessor
-// header guards prevent them being re-defined inside of the kiss_fft_$type
-// namespaces declared within the kiss_fft_$type.{h,cc} sources.
-// Note that the original kiss_fft*.h files are untouched since they
-// may be used in libraries that include them directly.
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef FIXED_POINT
-#include <sys/types.h>
-#endif
-
-#ifdef USE_SIMD
-#include <xmmintrin.h>
-#endif
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_

+ 0 - 33
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h

@@ -1,33 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
-
-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h"
-
-// Wrap 16-bit kiss fft in its own namespace. Enables us to link an application
-// with different kiss fft resultions (16/32 bit interger, float, double)
-// without getting a linker error.
-#define FIXED_POINT 16
-namespace kissfft_fixed16 {
-#include "kiss_fft.h"
-#include "tools/kiss_fftr.h"
-}  // namespace kissfft_fixed16
-#undef FIXED_POINT
-#undef kiss_fft_scalar
-#undef KISS_FFT_H
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_

+ 0 - 30
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c

@@ -1,30 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
-const uint16_t kLogLut[]
-#ifndef _MSC_VER
-    __attribute__((aligned(4)))
-#endif  // _MSV_VER
-    = {0,    224,  442,  654,  861,  1063, 1259, 1450, 1636, 1817, 1992, 2163,
-       2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848,
-       3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934,
-       5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507,
-       5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633,
-       5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370,
-       5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762,
-       4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848,
-       3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659,
-       2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224,
-       1094, 963,  830,  695,  559,  421,  282,  142,  0,    0};

+ 0 - 40
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h

@@ -1,40 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Number of segments in the log lookup table. The table will be kLogSegments+1
-// in length (with some padding).
-#define kLogSegments 128
-#define kLogSegmentsLog2 7
-
-// Scale used by lookup table.
-#define kLogScale 65536
-#define kLogScaleLog2 16
-#define kLogCoeff 45426
-
-extern const uint16_t kLogLut[];
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_

+ 0 - 83
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c

@@ -1,83 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
-
-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
-#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
-
-#define kuint16max 0x0000FFFF
-
-// The following functions implement integer logarithms of various sizes. The
-// approximation is calculated according to method described in
-//       www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/
-//       publicaciones/SPL2007/Log10-spl07.pdf
-// It first calculates log2 of the input and then converts it to natural
-// logarithm.
-
-static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
-  // Part 1
-  int32_t frac = x - (1LL << log2x);
-  if (log2x < kLogScaleLog2) {
-    frac <<= kLogScaleLog2 - log2x;
-  } else {
-    frac >>= log2x - kLogScaleLog2;
-  }
-  // Part 2
-  const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
-  const uint32_t seg_unit =
-      (((uint32_t)1) << kLogScaleLog2) >> kLogSegmentsLog2;
-
-  const int32_t c0 = kLogLut[base_seg];
-  const int32_t c1 = kLogLut[base_seg + 1];
-  const int32_t seg_base = seg_unit * base_seg;
-  const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2;
-  return frac + c0 + rel_pos;
-}
-
-static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
-  const uint32_t integer = MostSignificantBit32(x) - 1;
-  const uint32_t fraction = Log2FractionPart(x, integer);
-  const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
-  const uint32_t round = kLogScale / 2;
-  const uint32_t loge = (((uint64_t)kLogCoeff) * log2 + round) >> kLogScaleLog2;
-  // Finally scale to our output scale
-  const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
-  return loge_scaled;
-}
-
-uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
-                        int signal_size, int correction_bits) {
-  const int scale_shift = state->scale_shift;
-  uint16_t* output = (uint16_t*)signal;
-  uint16_t* ret = output;
-  int i;
-  for (i = 0; i < signal_size; ++i) {
-    uint32_t value = *signal++;
-    if (state->enable_log) {
-      if (correction_bits < 0) {
-        value >>= -correction_bits;
-      } else {
-        value <<= correction_bits;
-      }
-      if (value > 1) {
-        value = Log(value, scale_shift);
-      } else {
-        value = 0;
-      }
-    }
-    *output++ = (value < kuint16max) ? value : kuint16max;
-  }
-  return ret;
-}

+ 0 - 39
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h

@@ -1,39 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct LogScaleState {
-  int enable_log;
-  int scale_shift;
-};
-
-// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note
-// that the signal array will be modified.
-uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
-                        int signal_size, int correction_bits);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_

+ 0 - 27
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c

@@ -1,27 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
-
-void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) {
-  config->enable_log = 1;
-  config->scale_shift = 6;
-}
-
-int LogScalePopulateState(const struct LogScaleConfig* config,
-                          struct LogScaleState* state) {
-  state->enable_log = config->enable_log;
-  state->scale_shift = config->scale_shift;
-  return 1;
-}

+ 0 - 45
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h

@@ -1,45 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct LogScaleConfig {
-  // set to false (0) to disable this module
-  int enable_log;
-  // scale results by 2^(scale_shift)
-  int scale_shift;
-};
-
-// Populates the LogScaleConfig with "sane" default values.
-void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config);
-
-// Allocates any buffers.
-int LogScalePopulateState(const struct LogScaleConfig* config,
-                          struct LogScaleState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_

+ 0 - 51
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c

@@ -1,51 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
-
-#include <string.h>
-
-void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
-  int i;
-  for (i = 0; i < state->num_channels; ++i) {
-    const uint32_t smoothing =
-        ((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing;
-    const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing;
-
-    // Update the estimate of the noise.
-    const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
-    uint32_t estimate =
-        (((uint64_t)signal_scaled_up * smoothing) +
-         ((uint64_t)state->estimate[i] * one_minus_smoothing)) >>
-        kNoiseReductionBits;
-    state->estimate[i] = estimate;
-
-    // Make sure that we can't get a negative value for the signal - estimate.
-    if (estimate > signal_scaled_up) {
-      estimate = signal_scaled_up;
-    }
-
-    const uint32_t floor =
-        ((uint64_t)signal[i] * state->min_signal_remaining) >>
-        kNoiseReductionBits;
-    const uint32_t subtracted =
-        (signal_scaled_up - estimate) >> state->smoothing_bits;
-    const uint32_t output = subtracted > floor ? subtracted : floor;
-    signal[i] = output;
-  }
-}
-
-void NoiseReductionReset(struct NoiseReductionState* state) {
-  memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels);
-}

+ 0 - 46
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h

@@ -1,46 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
-
-#define kNoiseReductionBits 14
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct NoiseReductionState {
-  int smoothing_bits;
-  uint16_t even_smoothing;
-  uint16_t odd_smoothing;
-  uint16_t min_signal_remaining;
-  int num_channels;
-  uint32_t* estimate;
-};
-
-// Removes stationary noise from each channel of the signal using a low pass
-// filter.
-void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal);
-
-void NoiseReductionReset(struct NoiseReductionState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_

+ 0 - 45
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c

@@ -1,45 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
-
-#include <stdio.h>
-
-void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) {
-  config->smoothing_bits = 10;
-  config->even_smoothing = 0.025;
-  config->odd_smoothing = 0.06;
-  config->min_signal_remaining = 0.05;
-}
-
-int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
-                                struct NoiseReductionState* state,
-                                int num_channels) {
-  state->smoothing_bits = config->smoothing_bits;
-  state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits);
-  state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits);
-  state->min_signal_remaining =
-      config->min_signal_remaining * (1 << kNoiseReductionBits);
-  state->num_channels = num_channels;
-  state->estimate = calloc(state->num_channels, sizeof(*state->estimate));
-  if (state->estimate == NULL) {
-    fprintf(stderr, "Failed to alloc estimate buffer\n");
-    return 0;
-  }
-  return 1;
-}
-
-void NoiseReductionFreeStateContents(struct NoiseReductionState* state) {
-  free(state->estimate);
-}

+ 0 - 50
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h

@@ -1,50 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
-
-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct NoiseReductionConfig {
-  // scale the signal up by 2^(smoothing_bits) before reduction
-  int smoothing_bits;
-  // smoothing coefficient for even-numbered channels
-  float even_smoothing;
-  // smoothing coefficient for odd-numbered channels
-  float odd_smoothing;
-  // fraction of signal to preserve (1.0 disables this module)
-  float min_signal_remaining;
-};
-
-// Populates the NoiseReductionConfig with "sane" default values.
-void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config);
-
-// Allocates any buffers.
-int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
-                                struct NoiseReductionState* state,
-                                int num_channels);
-
-// Frees any allocated buffers.
-void NoiseReductionFreeStateContents(struct NoiseReductionState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_

+ 0 - 56
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c

@@ -1,56 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
-
-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
-
-int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
-  if (x <= 2) {
-    return lut[x];
-  }
-
-  const int16_t interval = MostSignificantBit32(x);
-  lut += 4 * interval - 6;
-
-  const int16_t frac =
-      ((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) &
-      0x3FF;
-
-  int32_t result = ((int32_t)lut[2] * frac) >> 5;
-  result += (int32_t)((uint32_t)lut[1] << 5);
-  result *= frac;
-  result = (result + (1 << 14)) >> 15;
-  result += lut[0];
-  return (int16_t)result;
-}
-
-uint32_t PcanShrink(const uint32_t x) {
-  if (x < (2 << kPcanSnrBits)) {
-    return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits);
-  } else {
-    return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits);
-  }
-}
-
-void PcanGainControlApply(struct PcanGainControlState* state,
-                          uint32_t* signal) {
-  int i;
-  for (i = 0; i < state->num_channels; ++i) {
-    const uint32_t gain =
-        WideDynamicFunction(state->noise_estimate[i], state->gain_lut);
-    const uint32_t snr = ((uint64_t)signal[i] * gain) >> state->snr_shift;
-    signal[i] = PcanShrink(snr);
-  }
-}

+ 0 - 47
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h

@@ -1,47 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#define kPcanSnrBits 12
-#define kPcanOutputBits 6
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Details at https://research.google/pubs/pub45911.pdf
-struct PcanGainControlState {
-  int enable_pcan;
-  uint32_t* noise_estimate;
-  int num_channels;
-  int16_t* gain_lut;
-  int32_t snr_shift;
-};
-
-int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut);
-
-uint32_t PcanShrink(const uint32_t x);
-
-void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_

+ 0 - 92
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c

@@ -1,92 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
-
-#include <math.h>
-#include <stdio.h>
-
-#define kint16max 0x00007FFF
-
-void PcanGainControlFillConfigWithDefaults(
-    struct PcanGainControlConfig* config) {
-  config->enable_pcan = 0;
-  config->strength = 0.95;
-  config->offset = 80.0;
-  config->gain_bits = 21;
-}
-
-int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
-                               int32_t input_bits, uint32_t x) {
-  const float x_as_float = ((float)x) / ((uint32_t)1 << input_bits);
-  const float gain_as_float =
-      ((uint32_t)1 << config->gain_bits) *
-      powf(x_as_float + config->offset, -config->strength);
-
-  if (gain_as_float > kint16max) {
-    return kint16max;
-  }
-  return (int16_t)(gain_as_float + 0.5f);
-}
-
-int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
-                                 struct PcanGainControlState* state,
-                                 uint32_t* noise_estimate,
-                                 const int num_channels,
-                                 const uint16_t smoothing_bits,
-                                 const int32_t input_correction_bits) {
-  state->enable_pcan = config->enable_pcan;
-  if (!state->enable_pcan) {
-    return 1;
-  }
-  state->noise_estimate = noise_estimate;
-  state->num_channels = num_channels;
-  state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
-  if (state->gain_lut == NULL) {
-    fprintf(stderr, "Failed to allocate gain LUT\n");
-    return 0;
-  }
-  state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits;
-
-  const int32_t input_bits = smoothing_bits - input_correction_bits;
-  state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
-  state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
-  state->gain_lut -= 6;
-  int interval;
-  for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
-    const uint32_t x0 = (uint32_t)1 << (interval - 1);
-    const uint32_t x1 = x0 + (x0 >> 1);
-    const uint32_t x2 =
-        (interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0;
-
-    const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
-    const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
-    const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
-
-    const int32_t diff1 = (int32_t)y1 - y0;
-    const int32_t diff2 = (int32_t)y2 - y0;
-    const int32_t a1 = 4 * diff1 - diff2;
-    const int32_t a2 = diff2 - a1;
-
-    state->gain_lut[4 * interval] = y0;
-    state->gain_lut[4 * interval + 1] = (int16_t)a1;
-    state->gain_lut[4 * interval + 2] = (int16_t)a2;
-  }
-  state->gain_lut += 6;
-  return 1;
-}
-
-void PcanGainControlFreeStateContents(struct PcanGainControlState* state) {
-  free(state->gain_lut);
-}

+ 0 - 57
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h

@@ -1,57 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
-
-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
-
-#define kWideDynamicFunctionBits 32
-#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct PcanGainControlConfig {
-  // set to false (0) to disable this module
-  int enable_pcan;
-  // gain normalization exponent (0.0 disables, 1.0 full strength)
-  float strength;
-  // positive value added in the normalization denominator
-  float offset;
-  // number of fractional bits in the gain
-  int gain_bits;
-};
-
-void PcanGainControlFillConfigWithDefaults(
-    struct PcanGainControlConfig* config);
-
-int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
-                               int32_t input_bits, uint32_t x);
-
-int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
-                                 struct PcanGainControlState* state,
-                                 uint32_t* noise_estimate,
-                                 const int num_channels,
-                                 const uint16_t smoothing_bits,
-                                 const int32_t input_correction_bits);
-
-void PcanGainControlFreeStateContents(struct PcanGainControlState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_

+ 0 - 70
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c

@@ -1,70 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
-
-#include <string.h>
-
-int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
-                         size_t num_samples, size_t* num_samples_read) {
-  const int size = state->size;
-
-  // Copy samples from the samples buffer over to our local input.
-  size_t max_samples_to_copy = state->size - state->input_used;
-  if (max_samples_to_copy > num_samples) {
-    max_samples_to_copy = num_samples;
-  }
-  memcpy(state->input + state->input_used, samples,
-         max_samples_to_copy * sizeof(*samples));
-  *num_samples_read = max_samples_to_copy;
-  state->input_used += max_samples_to_copy;
-
-  if (state->input_used < state->size) {
-    // We don't have enough samples to compute a window.
-    return 0;
-  }
-
-  // Apply the window to the input.
-  const int16_t* coefficients = state->coefficients;
-  const int16_t* input = state->input;
-  int16_t* output = state->output;
-  int i;
-  int16_t max_abs_output_value = 0;
-  for (i = 0; i < size; ++i) {
-    int16_t new_value =
-        (((int32_t)*input++) * *coefficients++) >> kFrontendWindowBits;
-    *output++ = new_value;
-    if (new_value < 0) {
-      new_value = -new_value;
-    }
-    if (new_value > max_abs_output_value) {
-      max_abs_output_value = new_value;
-    }
-  }
-  // Shuffle the input down by the step size, and update how much we have used.
-  memmove(state->input, state->input + state->step,
-          sizeof(*state->input) * (state->size - state->step));
-  state->input_used -= state->step;
-  state->max_abs_output_value = max_abs_output_value;
-
-  // Indicate that the output buffer is valid for the next stage.
-  return 1;
-}
-
-void WindowReset(struct WindowState* state) {
-  memset(state->input, 0, state->size * sizeof(*state->input));
-  memset(state->output, 0, state->size * sizeof(*state->output));
-  state->input_used = 0;
-  state->max_abs_output_value = 0;
-}

+ 0 - 49
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h

@@ -1,49 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#define kFrontendWindowBits 12
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct WindowState {
-  size_t size;
-  int16_t* coefficients;
-  size_t step;
-
-  int16_t* input;
-  size_t input_used;
-  int16_t* output;
-  int16_t max_abs_output_value;
-};
-
-// Applies a window to the samples coming in, stepping forward at the given
-// rate.
-int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
-                         size_t num_samples, size_t* num_samples_read);
-
-void WindowReset(struct WindowState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_

+ 0 - 73
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c

@@ -1,73 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-// Some platforms don't have M_PI
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
-void WindowFillConfigWithDefaults(struct WindowConfig* config) {
-  config->size_ms = 25;
-  config->step_size_ms = 10;
-}
-
-int WindowPopulateState(const struct WindowConfig* config,
-                        struct WindowState* state, int sample_rate) {
-  state->size = config->size_ms * sample_rate / 1000;
-  state->step = config->step_size_ms * sample_rate / 1000;
-
-  state->coefficients = malloc(state->size * sizeof(*state->coefficients));
-  if (state->coefficients == NULL) {
-    fprintf(stderr, "Failed to allocate window coefficients\n");
-    return 0;
-  }
-
-  // Populate the window values.
-  const float arg = M_PI * 2.0 / ((float)state->size);
-  int i;
-  for (i = 0; i < state->size; ++i) {
-    float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
-    // Scale it to fixed point and round it.
-    state->coefficients[i] =
-        floor(float_value * (1 << kFrontendWindowBits) + 0.5);
-  }
-
-  state->input_used = 0;
-  state->input = malloc(state->size * sizeof(*state->input));
-  if (state->input == NULL) {
-    fprintf(stderr, "Failed to allocate window input\n");
-    return 0;
-  }
-
-  state->output = malloc(state->size * sizeof(*state->output));
-  if (state->output == NULL) {
-    fprintf(stderr, "Failed to allocate window output\n");
-    return 0;
-  }
-
-  return 1;
-}
-
-void WindowFreeStateContents(struct WindowState* state) {
-  free(state->coefficients);
-  free(state->input);
-  free(state->output);
-}

+ 0 - 45
code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h

@@ -1,45 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
-
-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct WindowConfig {
-  // length of window frame in milliseconds
-  size_t size_ms;
-  // length of step for next frame in milliseconds
-  size_t step_size_ms;
-};
-
-// Populates the WindowConfig with "sane" default values.
-void WindowFillConfigWithDefaults(struct WindowConfig* config);
-
-// Allocates any buffers.
-int WindowPopulateState(const struct WindowConfig* config,
-                        struct WindowState* state, int sample_rate);
-
-// Frees any allocated buffers.
-void WindowFreeStateContents(struct WindowState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_

+ 0 - 1180
code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h

@@ -1,1180 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
-
-#include <algorithm>
-#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
-#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-#endif
-#endif
-
-#include <functional>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-constexpr int kReverseShift = -1;
-
-inline void GetActivationMinMax(FusedActivationFunctionType ac,
-                                float* output_activation_min,
-                                float* output_activation_max) {
-  switch (ac) {
-    case FusedActivationFunctionType::kNone:
-      *output_activation_min = std::numeric_limits<float>::lowest();
-      *output_activation_max = std::numeric_limits<float>::max();
-      break;
-    case FusedActivationFunctionType::kRelu:
-      *output_activation_min = 0.f;
-      *output_activation_max = std::numeric_limits<float>::max();
-      break;
-    case FusedActivationFunctionType::kRelu1:
-      *output_activation_min = -1.f;
-      *output_activation_max = 1.f;
-      break;
-    case FusedActivationFunctionType::kRelu6:
-      *output_activation_min = 0.f;
-      *output_activation_max = 6.f;
-      break;
-  }
-}
-
-template <typename T>
-inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
-                                      T output_activation_max) {
-  using std::max;
-  using std::min;
-  return min(max(x, output_activation_min), output_activation_max);
-}
-
-// Legacy function, left for compatibility only.
-template <FusedActivationFunctionType Ac>
-float ActivationFunction(float x) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  return ActivationFunctionWithMinMax(x, output_activation_min,
-                                      output_activation_max);
-}
-
-inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
-                         const float* bias_data, int array_size,
-                         float* array_data) {
-  if (bias_size == 0) return;
-  // Note: see b/132215220: in May 2019 we thought it would be OK to replace
-  // this with the Eigen one-liner:
-  //   return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
-  // This turned out to severely regress performance: +4ms (i.e. 8%) on
-  // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
-  TFLITE_DCHECK_EQ((array_size % bias_size), 0);
-#ifdef USE_NEON
-  float* array_ptr = array_data;
-  float* array_end_ptr = array_ptr + array_size;
-  const auto clamp_min_vec = vdupq_n_f32(clamp_min);
-  const auto clamp_max_vec = vdupq_n_f32(clamp_max);
-  for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
-    int i = 0;
-    for (; i <= bias_size - 16; i += 16) {
-      auto b0 = vld1q_f32(bias_data + i);
-      auto b1 = vld1q_f32(bias_data + i + 4);
-      auto b2 = vld1q_f32(bias_data + i + 8);
-      auto b3 = vld1q_f32(bias_data + i + 12);
-      auto a0 = vld1q_f32(array_ptr + i);
-      auto a1 = vld1q_f32(array_ptr + i + 4);
-      auto a2 = vld1q_f32(array_ptr + i + 8);
-      auto a3 = vld1q_f32(array_ptr + i + 12);
-      auto x0 = vaddq_f32(a0, b0);
-      auto x1 = vaddq_f32(a1, b1);
-      auto x2 = vaddq_f32(a2, b2);
-      auto x3 = vaddq_f32(a3, b3);
-      x0 = vmaxq_f32(clamp_min_vec, x0);
-      x1 = vmaxq_f32(clamp_min_vec, x1);
-      x2 = vmaxq_f32(clamp_min_vec, x2);
-      x3 = vmaxq_f32(clamp_min_vec, x3);
-      x0 = vminq_f32(clamp_max_vec, x0);
-      x1 = vminq_f32(clamp_max_vec, x1);
-      x2 = vminq_f32(clamp_max_vec, x2);
-      x3 = vminq_f32(clamp_max_vec, x3);
-      vst1q_f32(array_ptr + i, x0);
-      vst1q_f32(array_ptr + i + 4, x1);
-      vst1q_f32(array_ptr + i + 8, x2);
-      vst1q_f32(array_ptr + i + 12, x3);
-    }
-    for (; i <= bias_size - 4; i += 4) {
-      auto b = vld1q_f32(bias_data + i);
-      auto a = vld1q_f32(array_ptr + i);
-      auto x = vaddq_f32(a, b);
-      x = vmaxq_f32(clamp_min_vec, x);
-      x = vminq_f32(clamp_max_vec, x);
-      vst1q_f32(array_ptr + i, x);
-    }
-    for (; i < bias_size; i++) {
-      array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i],
-                                                  clamp_min, clamp_max);
-    }
-  }
-#else  // not NEON
-  for (int array_offset = 0; array_offset < array_size;
-       array_offset += bias_size) {
-    for (int i = 0; i < bias_size; i++) {
-      array_data[array_offset + i] = ActivationFunctionWithMinMax(
-          array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
-    }
-  }
-#endif
-}
-
-// Single-rounding MultiplyByQuantizedMultiplier
-#if TFLITE_SINGLE_ROUNDING
-inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  TFLITE_DCHECK(quantized_multiplier >= 0);
-  TFLITE_DCHECK(shift >= -31 && shift <= 30);
-
-  const int64_t total_shift = 31 - shift;
-  const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
-  int64_t result = x * static_cast<int64_t>(quantized_multiplier) + round;
-  result = result >> total_shift;
-
-  TFLITE_DCHECK(result >= std::numeric_limits<int32_t>::min() &&
-                result <= std::numeric_limits<int32_t>::max());
-  return static_cast<int32_t>(result);
-}
-
-inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
-    int32_t x, int32_t quantized_multiplier, int shift) {
-  TFLITE_DCHECK_LE(shift, 0);
-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
-}
-
-inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
-    int32_t x, int32_t quantized_multiplier, int shift) {
-  TFLITE_DCHECK_GE(shift, 0);
-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
-}
-
-inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  // Inputs:
-  // - quantized_multiplier has fixed point at bit 31
-  // - shift is -31 to +7 (negative for right shift)
-  //
-  // Assumptions: The following input ranges are assumed
-  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
-  // - scaling is chosen so final scaled result fits in int32_t
-  // - input x is in the range -(1<<47) <= x < (1<<47)
-  TFLITE_DCHECK(quantized_multiplier >= 0);
-  TFLITE_DCHECK(shift >= -31 && shift < 8);
-  TFLITE_DCHECK(x >= -(static_cast<int64_t>(1) << 47) &&
-                x < (static_cast<int64_t>(1) << 47));
-
-  const int32_t reduced_multiplier =
-      (quantized_multiplier < 0x7FFF0000)
-          ? ((quantized_multiplier + (1 << 15)) >> 16)
-          : 0x7FFF;
-  const int64_t total_shift = 15 - shift;
-  const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
-  int64_t result = x * static_cast<int64_t>(reduced_multiplier) + round;
-  result = result >> total_shift;
-
-  TFLITE_DCHECK(result >= std::numeric_limits<int32_t>::min() &&
-                result <= std::numeric_limits<int32_t>::max());
-  return static_cast<int32_t>(result);
-}
-
-#ifdef USE_NEON
-inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
-    int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
-  TFLITE_DCHECK(quantized_multiplier >= 0);
-
-  const int right_shift = std::min(-1, shift);
-  const int left_shift = shift - right_shift;
-
-  const int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
-  const int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
-  const int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
-
-  int32x4x4_t result;
-  result.val[0] = vrshlq_s32(
-      vqdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup),
-      right_shift_dup);
-
-  result.val[1] = vrshlq_s32(
-      vqdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup),
-      right_shift_dup);
-
-  result.val[2] = vrshlq_s32(
-      vqdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup),
-      right_shift_dup);
-
-  result.val[3] = vrshlq_s32(
-      vqdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup),
-      right_shift_dup);
-
-  return result;
-}
-#endif  // USE_NEON
-// Double-rounding MultiplyByQuantizedMultiplier
-#else
-inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
-    int32_t x, int32_t quantized_multiplier, int left_shift) {
-  using gemmlowp::RoundingDivideByPOT;
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  return RoundingDivideByPOT(
-      SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
-}
-
-inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
-    int32_t x, int32_t quantized_multiplier, int left_shift) {
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
-                                           quantized_multiplier);
-}
-
-inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  using gemmlowp::RoundingDivideByPOT;
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  int left_shift = shift > 0 ? shift : 0;
-  int right_shift = shift > 0 ? 0 : -shift;
-  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
-                                 x * (1 << left_shift), quantized_multiplier),
-                             right_shift);
-}
-
-inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  // Inputs:
-  // - quantized_multiplier has fixed point at bit 31
-  // - shift is -31 to +7 (negative for right shift)
-  //
-  // Assumptions: The following input ranges are assumed
-  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
-  // - scaling is chosen so final scaled result fits in int32_t
-  // - input x is in the range -(1<<47) <= x < (1<<47)
-  assert(quantized_multiplier >= 0);
-  assert(shift >= -31 && shift < 8);
-  assert(x >= -(static_cast<int64_t>(1) << 47) &&
-         x < (static_cast<int64_t>(1) << 47));
-
-  int32_t reduced_multiplier = (quantized_multiplier < 0x7FFF0000)
-                                   ? ((quantized_multiplier + (1 << 15)) >> 16)
-                                   : 0x7FFF;
-  int total_shift = 15 - shift;
-  x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
-  int32_t result = x >> total_shift;
-  return result;
-}
-
-#ifdef USE_NEON
-// Round uses ARM's rounding shift right.
-inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
-    int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
-  const int left_shift = std::max(shift, 0);
-  const int right_shift = std::min(shift, 0);
-  int32x4x4_t result;
-
-  int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
-  int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
-  int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
-
-  result.val[0] =
-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup),
-                               multiplier_dup),
-                 right_shift_dup);
-
-  result.val[1] =
-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup),
-                               multiplier_dup),
-                 right_shift_dup);
-
-  result.val[2] =
-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup),
-                               multiplier_dup),
-                 right_shift_dup);
-
-  result.val[3] =
-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup),
-                               multiplier_dup),
-                 right_shift_dup);
-
-  return result;
-}
-#endif  // USE_NEON
-#endif  // TFLITE_SINGLE_ROUNDING
-
-template <typename T>
-int CountLeadingZeros(T integer_input) {
-  static_assert(std::is_unsigned<T>::value,
-                "Only unsigned integer types handled.");
-#if defined(__GNUC__)
-  return integer_input ? __builtin_clz(integer_input)
-                       : std::numeric_limits<T>::digits;
-#else
-  if (integer_input == 0) {
-    return std::numeric_limits<T>::digits;
-  }
-
-  const T one_in_leading_positive = static_cast<T>(1)
-                                    << (std::numeric_limits<T>::digits - 1);
-  int leading_zeros = 0;
-  while (integer_input < one_in_leading_positive) {
-    integer_input <<= 1;
-    ++leading_zeros;
-  }
-  return leading_zeros;
-#endif
-}
-
-template <typename T>
-inline int CountLeadingSignBits(T integer_input) {
-  static_assert(std::is_signed<T>::value, "Only signed integer types handled.");
-#if defined(__GNUC__) && !defined(__clang__)
-  return integer_input ? __builtin_clrsb(integer_input)
-                       : std::numeric_limits<T>::digits;
-#else
-  using U = typename std::make_unsigned<T>::type;
-  return integer_input >= 0
-             ? CountLeadingZeros(static_cast<U>(integer_input)) - 1
-         : integer_input != std::numeric_limits<T>::min()
-             ? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
-             : 0;
-#endif
-}
-
-// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)).
-template <typename Integer>
-inline Integer FloorLog2(Integer n) {
-  static_assert(std::is_integral<Integer>::value, "");
-  static_assert(std::is_signed<Integer>::value, "");
-  static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, "");
-  TFLITE_CHECK_GT(n, 0);
-  if (sizeof(Integer) == 4) {
-    return 30 - CountLeadingSignBits(n);
-  } else {
-    return 62 - CountLeadingSignBits(n);
-  }
-}
-
-// The size of the LUT depends on the type of input. For int8 inputs a simple
-// 256 entries LUT is used. For int16 inputs the high 9 bits are used for
-// indexing and the 7 remaining bits are used for interpolation. We thus use a
-// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry
-// to interpolate the last value.
-template <typename LutInT>
-constexpr int lut_size() {
-  static_assert(std::is_same<LutInT, int8_t>::value ||
-                    std::is_same<LutInT, int16_t>::value,
-                "Only LUTs with int8 or int16 inputs are supported.");
-  return std::is_same<LutInT, int8_t>::value ? 256 : 513;
-}
-
-// Generate a LUT for 'func' which can be used to approximate functions like
-// exp, log, ...
-//
-// - func: the function to build the LUT for (e.g exp(x))
-// - input_min, input_max: range of the func inputs
-// - output_min, output_max: range of the func outputs
-// - lut: pointer to the LUT table to fill, the table must be of size
-// lut_size<LutInT>()
-template <typename FloatT, typename LutInT, typename LutOutT>
-inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max,
-                    FloatT output_min, FloatT output_max, LutOutT* lut) {
-  static_assert(std::is_same<LutInT, int8_t>::value ||
-                    std::is_same<LutInT, int16_t>::value,
-                "Only LUTs with int8 or int16 inputs are supported.");
-  static_assert(std::is_same<LutOutT, int8_t>::value ||
-                    std::is_same<LutOutT, int16_t>::value,
-                "Only LUTs with int8 or int16 outputs are supported.");
-  static_assert(std::is_floating_point<FloatT>::value,
-                "FloatT must be a floating-point type.");
-
-  const int nb_steps = std::is_same<LutInT, int8_t>::value ? 256 : 512;
-  const FloatT step = (input_max - input_min) / nb_steps;
-  const FloatT half_step = step / 2;
-  const FloatT output_scaling_inv =
-      static_cast<FloatT>(std::numeric_limits<LutOutT>::max() -
-                          std::numeric_limits<LutOutT>::min() + 1) /
-      (output_max - output_min);
-  const FloatT table_min =
-      static_cast<FloatT>(std::numeric_limits<LutOutT>::min());
-  const FloatT table_max =
-      static_cast<FloatT>(std::numeric_limits<LutOutT>::max());
-
-  for (int i = 0; i < nb_steps; i++) {
-    const FloatT val = func(input_min + i * step);
-    const FloatT val_midpoint = func(input_min + i * step + half_step);
-    const FloatT val_next = func(input_min + (i + 1) * step);
-
-    const FloatT sample_val = TfLiteRound(val * output_scaling_inv);
-    const FloatT midpoint_interp_val =
-        TfLiteRound((val_next * output_scaling_inv +
-                     TfLiteRound(val * output_scaling_inv)) /
-                    2);
-    const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv);
-    const FloatT midpoint_err = midpoint_interp_val - midpoint_val;
-    const FloatT bias = TfLiteRound(midpoint_err / 2);
-
-    lut[i] = static_cast<LutOutT>(std::min<FloatT>(
-        std::max<FloatT>(sample_val - bias, table_min), table_max));
-  }
-
-  const bool with_extra_interpolation_value =
-      std::is_same<LutInT, int16_t>::value;
-  if (with_extra_interpolation_value) {
-    lut[nb_steps] = static_cast<LutOutT>(std::min<FloatT>(
-        std::max<FloatT>(TfLiteRound(func(input_max) * output_scaling_inv),
-                         table_min),
-        table_max));
-  }
-}
-
-// LUT must have 513 values
-template <typename LutOutT>
-inline LutOutT lut_lookup_with_interpolation(int16_t value,
-                                             const LutOutT* lut) {
-  static_assert(std::is_same<LutOutT, int8_t>::value ||
-                    std::is_same<LutOutT, int16_t>::value,
-                "Only LUTs with int8 or int16 outputs are supported.");
-  // 512 base values, lut[513] is only used to calculate the slope
-  const uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
-  assert(index < 512 && "LUT index out of range.");
-  const int16_t offset = value & 0x7f;
-
-  // Base and slope are Q0.x
-  const LutOutT base = lut[index];
-  const LutOutT slope = lut[index + 1] - lut[index];
-
-  // Q0.x * Q0.7 = Q0.(x + 7)
-  // Round and convert from Q0.(x + 7) to Q0.x
-  const int delta = (slope * offset + 64) >> 7;
-
-  // Q0.15 + Q0.15
-  return static_cast<LutOutT>(base + delta);
-}
-
-// int16_t -> int16_t table lookup with interpolation
-// LUT must have 513 values
-inline int16_t lut_lookup(int16_t value, const int16_t* lut) {
-  return lut_lookup_with_interpolation(value, lut);
-}
-
-// int16_t -> int8_t table lookup with interpolation
-// LUT must have 513 values
-inline int8_t lut_lookup(int16_t value, const int8_t* lut) {
-  return lut_lookup_with_interpolation(value, lut);
-}
-
-// int8_t -> int8_t table lookup without interpolation
-// LUT must have 256 values
-inline int8_t lut_lookup(int8_t value, const int8_t* lut) {
-  return lut[128 + value];
-}
-
-// int8_t -> int16_t table lookup without interpolation
-// LUT must have 256 values
-inline int16_t lut_lookup(int8_t value, const int16_t* lut) {
-  return lut[128 + value];
-}
-
-// Table of sigmoid(i/24) at 0.16 format - 256 elements.
-
-// We use combined sigmoid and tanh look-up table, since
-// tanh(x) = 2*sigmoid(2*x) -1.
-// Both functions are symmetric, so the LUT table is only needed
-// for the absolute value of the input.
-static const uint16_t sigmoid_table_uint16[256] = {
-    32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498,
-    40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255,
-    46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865,
-    52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174,
-    56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288,
-    59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441,
-    61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886,
-    62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835,
-    63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450,
-    64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845,
-    64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097,
-    65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258,
-    65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360,
-    65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
-    65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465,
-    65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491,
-    65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508,
-    65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518,
-    65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525,
-    65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529,
-    65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531,
-    65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533,
-    65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
-    65534, 65534, 65535};
-
-// TODO(b/77858996): Add these to gemmlowp.
-template <typename IntegerType>
-IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
-  return a;
-}
-
-template <>
-inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
-  std::int64_t a64 = a;
-  std::int64_t b64 = b;
-  std::int64_t sum = a64 + b64;
-  return static_cast<std::int32_t>(std::min(
-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
-      std::max(
-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
-          sum)));
-}
-
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingAddNonGemmlowp(a.raw(), b.raw()));
-}
-
-template <typename IntegerType>
-IntegerType SaturatingSub(IntegerType a, IntegerType b) {
-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
-  return a;
-}
-
-template <>
-inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
-  std::int32_t a32 = a;
-  std::int32_t b32 = b;
-  std::int32_t diff = a32 - b32;
-  return static_cast<std::int16_t>(
-      std::min(static_cast<int32_t>(32767),
-               std::max(static_cast<int32_t>(-32768), diff)));
-}
-
-template <>
-inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
-  std::int64_t a64 = a;
-  std::int64_t b64 = b;
-  std::int64_t diff = a64 - b64;
-  return static_cast<std::int32_t>(std::min(
-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
-      std::max(
-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
-          diff)));
-}
-
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingSub(a.raw(), b.raw()));
-}
-// End section to be moved to gemmlowp.
-
-template <typename IntegerType>
-IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
-  if (exponent == 0) {
-    return x;
-  }
-  using ScalarIntegerType =
-      typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
-  const IntegerType min =
-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
-  const IntegerType max =
-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
-  const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
-
-  const std::int32_t threshold =
-      ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
-  const IntegerType positive_mask =
-      gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
-  const IntegerType negative_mask =
-      gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
-
-  IntegerType result = gemmlowp::ShiftLeft(x, exponent);
-  result = gemmlowp::SelectUsingMask(positive_mask, max, result);
-  result = gemmlowp::SelectUsingMask(negative_mask, min, result);
-  return result;
-}
-
-// If we want to leave IntegerBits fixed, then multiplication
-// by a power of two has to be saturating/rounding, not exact anymore.
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits>
-SaturatingRoundingMultiplyByPOTParam(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
-}
-
-// Convert int32_t multiplier to int16_t with rounding.
-inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
-                                            int16_t* multiplier_int16_t) {
-  TFLITE_DCHECK_GE(multiplier_int32_t, 0);
-  static constexpr int32_t kRoundingOffset = 1 << 15;
-  if (multiplier_int32_t >=
-      std::numeric_limits<int32_t>::max() - kRoundingOffset) {
-    *multiplier_int16_t = std::numeric_limits<int16_t>::max();
-    return;
-  }
-  const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
-  TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
-  TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
-  *multiplier_int16_t = result;
-  TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
-}
-
-// Minimum output bits to accommodate log of maximum input range.  It actually
-// does not matter if one considers, say, [-64,64] or [-64,64).
-//
-// For example, run this through Octave:
-// [0:127; ...
-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
-constexpr int min_log_x_output_bits(int input_bits) {
-  return input_bits > 90   ? 7
-         : input_bits > 44 ? 6
-         : input_bits > 21 ? 5
-         : input_bits > 10 ? 4
-         : input_bits > 4  ? 3
-         : input_bits > 1  ? 2
-                           : 1;
-}
-
-// Although currently the name of this function says that it cannot handle
-// values less than 1, in practice it can handle as low as 1/x_max, where
-// x_max is the largest representable input.  In other words, the output range
-// is symmetric.
-template <int OutputIntegerBits, int InputIntegerBits>
-inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
-log_x_for_x_greater_than_or_equal_to_1_impl(
-    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
-  // assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
-  // assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
-  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
-  // The reason for accumulating the result with an extra bit of headroom is
-  // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
-  // recip_denom will otherwise introduce an error.
-  static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
-  using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
-
-  const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1488522236, std::log(2.0));
-  const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
-  const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1518500250, std::sqrt(0.5));
-  const FixedPoint0 one_quarter =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
-
-  const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1057819769,
-      2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
-
-  const FixedPointAccum shifted_quarter =
-      gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
-
-  // Reinterpret the input value as Q0.31, because we will figure out the
-  // required shift "ourselves" instead of using, say, Rescale.
-  FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
-  // z_a_pow_2 = input_integer_bits - z_a_headroom;
-  int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
-  FixedPoint0 r_a_tmp =
-      SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
-  const int32_t r_a_raw =
-      SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
-  // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
-  // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
-  //                   InputIntegerBits - z_b_headroom - 0.25);
-  const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
-          static_cast<int32_t>(InputIntegerBits - z_a_headroom_plus_1),
-          31 - kAccumIntegerBits)),
-      shifted_quarter);
-
-  // z_b is treated like z_a, but premultiplying by sqrt(0.5).
-  FixedPoint0 z_b = z_a * sqrt_half;
-  int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
-  const int32_t r_b_raw =
-      SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
-  const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
-          static_cast<int32_t>(InputIntegerBits - z_b_headroom),
-          31 - kAccumIntegerBits)),
-      shifted_quarter);
-
-  const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
-  const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
-      std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
-
-  const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
-  FixedPoint0 q = r - sqrt_sqrt_half;
-  q = q + q;
-
-  const FixedPoint0 common_sq = q * q;
-  const FixedPoint0 num = q * r + q * common_sq * alpha_n;
-  const FixedPoint0 denom_minus_one_0 =
-      p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
-  const FixedPoint0 recip_denom =
-      one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
-
-  const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
-  return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
-                                              num_scaled * recip_denom);
-}
-
-template <int OutputIntegerBits, int InputIntegerBits>
-inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
-log_x_for_x_greater_than_or_equal_to_1(
-    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
-  static_assert(
-      OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
-      "Output integer bits must be sufficient to accommodate logs of inputs.");
-  return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
-                                                     InputIntegerBits>(
-      input_val);
-}
-
-inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
-                             int* num_bits_over_unit) {
-  int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
-  // This is the number of bits to the left of the binary point above 1.0.
-  // Consider x=1.25.  In that case shifted_scale=0.8 and
-  // no later adjustment will be needed.
-  *num_bits_over_unit = x_integer_digits - headroom_plus_one;
-  const int32_t shifted_sum_minus_one =
-      static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
-                           (static_cast<uint32_t>(1) << 31));
-
-  gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
-      gemmlowp::one_over_one_plus_x_for_x_in_0_1(
-          gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
-  return shifted_scale.raw();
-}
-
-inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
-                                             int32_t* output_inv_sqrt,
-                                             int* output_shift) {
-  TFLITE_DCHECK_GE(input, 0);
-  if (input <= 1) {
-    // Handle the input value 1 separately to avoid overflow in that case
-    // in the general computation below (b/143972021). Also handle 0 as if it
-    // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
-    // but rare/unrealistic input value. We can expect both to occur in some
-    // incompletely trained models, but probably not in fully trained models.
-    *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
-    *output_shift = 0;
-    return;
-  }
-  TFLITE_DCHECK_GT(input, 1);
-  *output_shift = 11;
-  while (input >= (1 << 29)) {
-    input /= 4;
-    ++*output_shift;
-  }
-  const unsigned max_left_shift_bits =
-      CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
-  const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
-  const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
-  *output_shift -= left_shift_bit_pairs;
-  input <<= 2 * left_shift_bit_pairs;
-  TFLITE_DCHECK_GE(input, (1 << 27));
-  TFLITE_DCHECK_LT(input, (1 << 29));
-  using gemmlowp::FixedPoint;
-  using gemmlowp::Rescale;
-  using gemmlowp::SaturatingRoundingMultiplyByPOT;
-  // Using 3 integer bits gives us enough room for the internal arithmetic in
-  // this Newton-Raphson iteration.
-  using F3 = FixedPoint<int32_t, 3>;
-  using F0 = FixedPoint<int32_t, 0>;
-  const F3 fixedpoint_input = F3::FromRaw(input >> 1);
-  const F3 fixedpoint_half_input =
-      SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
-  const F3 fixedpoint_half_three =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
-  // Newton-Raphson iteration
-  // Naive unoptimized starting guess: x = 1
-  F3 x = F3::One();
-  // Naive unoptimized number of iterations: 5
-  for (int i = 0; i < 5; i++) {
-    const F3 x3 = Rescale<3>(x * x * x);
-    x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
-  }
-  const F0 fixedpoint_half_sqrt_2 =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
-  x = x * fixedpoint_half_sqrt_2;
-  *output_inv_sqrt = x.raw();
-  if (*output_shift < 0) {
-    *output_inv_sqrt <<= -*output_shift;
-    *output_shift = 0;
-  }
-  // Convert right shift (right is positive) to left shift.
-  *output_shift *= reverse_shift;
-}
-
-// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
-// BROADCASTING.
-//
-// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
-// rectangular array of numbers.
-//
-// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
-// However, as Dims<N> is to be deprecated, this class exists as an adaptor
-// to enable simple unoptimized implementations of element-wise broadcasting
-// operations.
-template <int N>
-struct NdArrayDesc {
-  // The "extent" of each dimension. Indices along dimension d must be in the
-  // half-open interval [0, extents[d]).
-  int extents[N];
-
-  // The number of *elements* (not bytes) between consecutive indices of each
-  // dimension.
-  int strides[N];
-};
-
-// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
-// BROADCASTING.
-//
-// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
-inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
-                            int i3) {
-  TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
-  TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
-  TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
-  TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
-  return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
-         i3 * desc.strides[3];
-}
-
-inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) {
-  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
-         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
-         indexes[4] * desc.strides[4];
-}
-
-inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) {
-  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
-         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
-         indexes[4] * desc.strides[4] + indexes[5] * desc.strides[5] +
-         indexes[6] * desc.strides[6] + indexes[7] * desc.strides[7];
-}
-
-// Given the dimensions of the operands for an element-wise binary broadcast,
-// adjusts them so that they can be directly iterated over with simple loops.
-// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
-// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr.
-//
-// This function assumes that the two input shapes are compatible up to
-// broadcasting and the shorter one has already been prepended with 1s to be the
-// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64),
-// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that
-// Dims<N> refer to shapes in reverse order. In this case, input0_dims will be
-// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1).
-//
-// When two shapes are compatible up to broadcasting, for each dimension d,
-// the input extents are either equal, or one of them is 1.
-//
-// This function performs the following for each dimension d:
-// - If the extents are equal, then do nothing since the loop that walks over
-//   both of the input arrays is correct.
-// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1
-//   and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows
-//   array0 to be referenced *at any index* in dimension d and still access the
-//   same slice.
-template <int N>
-inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
-                                                const Dims<N>& input1_dims,
-                                                NdArrayDesc<N>* desc0_out,
-                                                NdArrayDesc<N>* desc1_out) {
-  TFLITE_DCHECK(desc0_out != nullptr);
-  TFLITE_DCHECK(desc1_out != nullptr);
-
-  // Copy dims to desc.
-  for (int i = 0; i < N; ++i) {
-    desc0_out->extents[i] = input0_dims.sizes[i];
-    desc0_out->strides[i] = input0_dims.strides[i];
-    desc1_out->extents[i] = input1_dims.sizes[i];
-    desc1_out->strides[i] = input1_dims.strides[i];
-  }
-
-  // Walk over each dimension. If the extents are equal do nothing.
-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
-  // stride 0.
-  for (int i = 0; i < N; ++i) {
-    const int extent0 = ArraySize(input0_dims, i);
-    const int extent1 = ArraySize(input1_dims, i);
-    if (extent0 != extent1) {
-      if (extent0 == 1) {
-        desc0_out->strides[i] = 0;
-        desc0_out->extents[i] = extent1;
-      } else {
-        TFLITE_DCHECK_EQ(extent1, 1);
-        desc1_out->strides[i] = 0;
-        desc1_out->extents[i] = extent0;
-      }
-    }
-  }
-}
-
-// Copies dims to desc, calculating strides.
-template <int N>
-inline void CopyDimsToDesc(const RuntimeShape& input_shape,
-                           NdArrayDesc<N>* desc_out) {
-  int desc_stride = 1;
-  for (int i = N - 1; i >= 0; --i) {
-    desc_out->extents[i] = input_shape.Dims(i);
-    desc_out->strides[i] = desc_stride;
-    desc_stride *= input_shape.Dims(i);
-  }
-}
-
-template <int N>
-inline void NdArrayDescsForElementwiseBroadcast(
-    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
-    NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
-  TFLITE_DCHECK(desc0_out != nullptr);
-  TFLITE_DCHECK(desc1_out != nullptr);
-
-  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
-  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
-
-  // Copy dims to desc, calculating strides.
-  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
-  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
-
-  // Walk over each dimension. If the extents are equal do nothing.
-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
-  // stride 0.
-  for (int i = 0; i < N; ++i) {
-    const int extent0 = extended_input0_shape.Dims(i);
-    const int extent1 = extended_input1_shape.Dims(i);
-    if (extent0 != extent1) {
-      if (extent0 == 1) {
-        desc0_out->strides[i] = 0;
-        desc0_out->extents[i] = extent1;
-      } else {
-        TFLITE_DCHECK_EQ(extent1, 1);
-        desc1_out->strides[i] = 0;
-        desc1_out->extents[i] = extent0;
-      }
-    }
-  }
-}
-
-template <int N>
-inline void NdArrayDescsForElementwiseBroadcast(
-    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
-    const RuntimeShape& input2_shape, NdArrayDesc<N>* desc0_out,
-    NdArrayDesc<N>* desc1_out, NdArrayDesc<N>* desc2_out) {
-  TFLITE_DCHECK(desc0_out != nullptr);
-  TFLITE_DCHECK(desc1_out != nullptr);
-  TFLITE_DCHECK(desc2_out != nullptr);
-
-  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
-  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
-  auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape);
-
-  // Copy dims to desc, calculating strides.
-  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
-  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
-  CopyDimsToDesc<N>(extended_input2_shape, desc2_out);
-
-  // Walk over each dimension. If the extents are equal do nothing.
-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
-  // stride 0.
-  for (int i = 0; i < N; ++i) {
-    const int extent0 = extended_input0_shape.Dims(i);
-    const int extent1 = extended_input1_shape.Dims(i);
-    const int extent2 = extended_input2_shape.Dims(i);
-
-    int extent = extent0;
-    if (extent1 != 1) extent = extent1;
-    if (extent2 != 1) extent = extent2;
-
-    TFLITE_DCHECK(extent0 == 1 || extent0 == extent);
-    TFLITE_DCHECK(extent1 == 1 || extent1 == extent);
-    TFLITE_DCHECK(extent2 == 1 || extent2 == extent);
-
-    if (!(extent0 == extent1 && extent1 == extent2)) {
-      if (extent0 == 1) {
-        desc0_out->strides[i] = 0;
-        desc0_out->extents[i] = extent;
-      }
-      if (extent1 == 1) {
-        desc1_out->strides[i] = 0;
-        desc1_out->extents[i] = extent;
-      }
-      if (extent2 == 1) {
-        desc2_out->strides[i] = 0;
-        desc2_out->extents[i] = extent;
-      }
-    }
-  }
-}
-
-// Detailed implementation of NDOpsHelper, the indexes must be a zero array.
-// This implementation is equivalent to N nested loops. Ex, if N=4, it can be
-// re-writen as:
-// for (int b = 0; b < output.extents[0]; ++b) {
-//   for (int y = 0; y < output.extents[1]; ++y) {
-//     for (int x = 0; x < output.extents[2]; ++x) {
-//       for (int c = 0; c < output.extents[3]; ++c) {
-//           calc({b,y,x,c});
-//       }
-//     }
-//   }
-// }
-template <int N, int DIM, typename Calc>
-typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(
-    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
-  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
-    NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
-  }
-}
-
-template <int N, int DIM, typename Calc>
-typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(
-    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
-  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
-    calc(indexes);
-  }
-}
-
-// Execute the calc function in the innermost iteration based on the shape of
-// the output. The calc function should take a single argument of type int[N].
-template <int N, typename Calc>
-inline void NDOpsHelper(const NdArrayDesc<N>& output, const Calc& calc) {
-  int indexes[N] = {0};
-  NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
-}
-// Copied from gemmlowp::RoundDown when we dropped direct dependency on
-// gemmlowp.
-//
-// Returns the runtime argument rounded down to the nearest multiple of
-// the fixed Modulus.
-template <unsigned Modulus, typename Integer>
-Integer RoundDown(Integer i) {
-  return i - (i % Modulus);
-}
-
-// Copied from gemmlowp::RoundUp when we dropped direct dependency on
-// gemmlowp.
-//
-// Returns the runtime argument rounded up to the nearest multiple of
-// the fixed Modulus.
-template <unsigned Modulus, typename Integer>
-Integer RoundUp(Integer i) {
-  return RoundDown<Modulus>(i + Modulus - 1);
-}
-
-// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on
-// gemmlowp.
-//
-// Returns the quotient a / b rounded up ('ceil') to the nearest integer.
-template <typename Integer>
-Integer CeilQuotient(Integer a, Integer b) {
-  return (a + b - 1) / b;
-}
-
-// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped
-// the direct dependency of internal/optimized/ on gemmlowp.
-//
-// It computes a reasonable number of threads to use for a GEMM of shape
-// (rows, cols, depth).
-//
-// TODO(b/131910176): get rid of this function by switching each call site
-// to its own more sensible logic for its own workload.
-template <int KernelRows>
-inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols,
-                                int depth) {
-  // Early-exit in the default case where multi-threading is disabled.
-  if (max_num_threads == 1) {
-    return 1;
-  }
-
-  // Ensure that each thread has KernelRows rows to process, if at all possible.
-  int thread_count = std::min(max_num_threads, rows / KernelRows);
-
-  // Limit the number of threads according to the overall size of the problem.
-  if (thread_count > 1) {
-    // Empirically determined value.
-    static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024;
-
-    // We can only multiply two out of three sizes without risking overflow
-    const std::uint64_t cubic_size =
-        std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth);
-
-    thread_count = std::min(
-        thread_count, static_cast<int>(cubic_size / min_cubic_size_per_thread));
-  }
-
-  if (thread_count < 1) {
-    thread_count = 1;
-  }
-
-  assert(thread_count > 0 && thread_count <= max_num_threads);
-  return thread_count;
-}
-
-template <typename T>
-void optimized_ops_preload_l1_stream(const T* ptr) {
-#ifdef __GNUC__
-  // builtin offered by GCC-compatible compilers including clang
-  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0);
-#else
-  (void)ptr;
-#endif
-}
-
-template <typename T>
-void optimized_ops_preload_l1_keep(const T* ptr) {
-#ifdef __GNUC__
-  // builtin offered by GCC-compatible compilers including clang
-  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
-#else
-  (void)ptr;
-#endif
-}
-
-template <typename T>
-void optimized_ops_prefetch_write_l1_keep(const T* ptr) {
-#ifdef __GNUC__
-  // builtin offered by GCC-compatible compilers including clang
-  __builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3);
-#else
-  (void)ptr;
-#endif
-}
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_

+ 0 - 122
code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h

@@ -1,122 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
-
-#include <cstdint>
-
-#include "tensorflow/lite/kernels/op_macros.h"
-
-#ifndef TFLITE_DCHECK
-#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_EQ
-#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_NE
-#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_GE
-#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_GT
-#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_LE
-#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_LT
-#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
-#ifndef TFLITE_CHECK
-#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_EQ
-#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_NE
-#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_GE
-#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_GT
-#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_LE
-#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_LT
-#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TF_LITE_STATIC_MEMORY
-// TODO(b/162019032): Consider removing these type-aliases.
-using int8 = std::int8_t;
-using uint8 = std::uint8_t;
-using int16 = std::int16_t;
-using uint16 = std::uint16_t;
-using int32 = std::int32_t;
-using uint32 = std::uint32_t;
-#endif  // !defined(TF_LITE_STATIC_MEMORY)
-
-// Allow for cross-compiler usage of function signatures - currently used for
-// specifying named RUY profiler regions in templated methods.
-#if defined(_MSC_VER)
-#define TFLITE_PRETTY_FUNCTION __FUNCSIG__
-#elif defined(__GNUC__)
-#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__
-#else
-#define TFLITE_PRETTY_FUNCTION __func__
-#endif
-
-// TFLITE_DEPRECATED()
-//
-// Duplicated from absl/base/macros.h to avoid pulling in that library.
-// Marks a deprecated class, struct, enum, function, method and variable
-// declarations. The macro argument is used as a custom diagnostic message (e.g.
-// suggestion of a better alternative).
-//
-// Example:
-//
-//   class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
-//   TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
-//
-// Every usage of a deprecated entity will trigger a warning when compiled with
-// clang's `-Wdeprecated-declarations` option. This option is turned off by
-// default, but the warnings will be reported by clang-tidy.
-#if defined(__clang__) && __cplusplus >= 201103L
-#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
-#endif
-
-#ifndef TFLITE_DEPRECATED
-#define TFLITE_DEPRECATED(message)
-#endif
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_

+ 0 - 40
code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h

@@ -1,40 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
-
-#include <cmath>
-
-namespace tflite {
-
-#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
-    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
-#define TF_LITE_GLOBAL_STD_PREFIX
-#else
-#define TF_LITE_GLOBAL_STD_PREFIX std
-#endif
-
-#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
-  template <class T>                                  \
-  inline T tf_name(const T x) {                       \
-    return TF_LITE_GLOBAL_STD_PREFIX::std_name(x);    \
-  }
-
-DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
-DECLARE_STD_GLOBAL_SWITCH1(TfLiteExpm1, expm1);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_

+ 0 - 35
code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h

@@ -1,35 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
-
-#include <cmath>
-
-namespace tflite {
-
-#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
-inline float TfLiteMax(const float& x, const float& y) {
-  return std::max(x, y);
-}
-#else
-template <class T>
-inline T TfLiteMax(const T& x, const T& y) {
-  return std::fmax(x, y);
-}
-#endif
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_

+ 0 - 35
code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h

@@ -1,35 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
-
-#include <cmath>
-
-namespace tflite {
-
-#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
-inline float TfLiteMin(const float& x, const float& y) {
-  return std::min(x, y);
-}
-#else
-template <class T>
-inline T TfLiteMin(const T& x, const T& y) {
-  return std::fmin(x, y);
-}
-#endif
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_

+ 0 - 20
code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h

@@ -1,20 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
-
-// TFLM does not need to utilize any Neon optimizations.
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_

+ 0 - 122
code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h

@@ -1,122 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
-
-#include <vector>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
-  return RuntimeShape(data.size(), data.data());
-}
-
-// A list of tensors in a format that can be used by kernels like split and
-// concatenation.
-template <typename T>
-class VectorOfTensors {
- public:
-  // Build with the tensors in 'tensor_list'.
-  VectorOfTensors(const TfLiteContext& context,
-                  const TfLiteIntArray& tensor_list) {
-    int num_tensors = tensor_list.size;
-
-    all_data_.reserve(num_tensors);
-    all_shape_.reserve(num_tensors);
-    all_shape_ptr_.reserve(num_tensors);
-
-    for (int i = 0; i < num_tensors; ++i) {
-      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
-      all_data_.push_back(GetTensorData<T>(t));
-      all_shape_.push_back(GetTensorShape(t));
-    }
-
-    // Taking the pointer from inside a std::vector is only OK if the vector is
-    // never modified, so we populate all_shape in the previous loop and then we
-    // are free to grab iterators here.
-    for (int i = 0; i < num_tensors; ++i) {
-      all_shape_ptr_.push_back(&all_shape_[i]);
-    }
-  }
-  // Return a pointer to the data pointers of all tensors in the list. For
-  // example:
-  //   float* const* f = v.data();
-  //   f[0][1] is the second element of the first tensor.
-  T* const* data() const { return all_data_.data(); }
-
-  // Return a pointer the shape pointers of all tensors in the list. For
-  // example:
-  //   const RuntimeShape* const* d = v.dims();
-  //   dims[1] are the dimensions of the second tensor in the list.
-  const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
-
- private:
-  std::vector<T*> all_data_;
-  std::vector<RuntimeShape> all_shape_;
-  std::vector<RuntimeShape*> all_shape_ptr_;
-};
-
-// A list of quantized tensors in a format that can be used by kernels like
-// split and concatenation.
-class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
- public:
-  // Build with the tensors in 'tensor_list'.
-  VectorOfQuantizedTensors(const TfLiteContext& context,
-                           const TfLiteIntArray& tensor_list)
-      : VectorOfTensors<uint8_t>(context, tensor_list) {
-    for (int i = 0; i < tensor_list.size; ++i) {
-      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
-      zero_point_.push_back(t->params.zero_point);
-      scale_.push_back(t->params.scale);
-    }
-  }
-
-  const float* scale() const { return scale_.data(); }
-  const int32_t* zero_point() const { return zero_point_.data(); }
-
- private:
-  std::vector<int32_t> zero_point_;
-  std::vector<float> scale_;
-};
-
-// Writes randomly accessed values from `input` sequentially into `output`.
-template <typename T>
-class SequentialTensorWriter {
- public:
-  SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) {
-    input_data_ = GetTensorData<T>(input);
-    output_ptr_ = GetTensorData<T>(output);
-  }
-  SequentialTensorWriter(const T* input_data, T* output_data)
-      : input_data_(input_data), output_ptr_(output_data) {}
-
-  void Write(int position) { *output_ptr_++ = input_data_[position]; }
-  void WriteN(int position, int len) {
-    memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
-    output_ptr_ += len;
-  }
-
- private:
-  const T* input_data_;
-  T* output_ptr_;
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_

+ 0 - 484
code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h

@@ -1,484 +0,0 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/c/common.h"
-
-#if defined(_MSC_VER)
-#define __restrict__ __restrict
-#endif
-
-namespace tflite {
-
-namespace tensor_utils {
-
-// Multiplies a matrix with a scalar and reduce the result on each row to a
-// scalar.
-// Parameters:
-//     - matrix: matrix of size n_row * n_col
-//     - scalar: the scalar that is multiplied to each element in the matrix
-//     - n_row:  the row count of the matrix
-//     - n_col:  the column count of the matrix
-//     - output: the 32bit output
-// Note: We do not need saturation because the int8 * int8 is safe from overflow
-// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
-// initial output value is not exceptionally large.
-void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
-                                    int32_t n_row, int32_t n_col,
-                                    int32_t* output);
-
-// Add another vector for each batch in the batch vector.
-template <typename T>
-void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
-                          T* batch_vector) {
-  for (int b = 0; b < n_batch; b++) {
-    for (int i = 0; i < v_size; ++i) {
-      batch_vector[i] += vector[i];
-    }
-    batch_vector += v_size;
-  }
-}
-
-// Cwise product of two vectors.
-template <typename T>
-inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
-                                     int v_size, T* result) {
-  for (int v = 0; v < v_size; v++) {
-    *result++ = *vector1++ * *vector2++;
-  }
-}
-
-// Cwise product of a vector and a batch-vector.
-template <typename T>
-inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
-                                          const T* batch_vector, int n_batch,
-                                          T* result) {
-  for (int b = 0; b < n_batch; b++) {
-    VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
-    // Update the pointers.
-    result += v_size;
-    batch_vector += v_size;
-  }
-}
-
-// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
-// assumption here is that result array is initialized to valid values.
-template <typename T>
-inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
-                                               const T* __restrict__ vector2,
-                                               int v_size,
-                                               T* __restrict__ result) {
-  for (int v = 0; v < v_size; v++) {
-    *result++ += *vector1++ * *vector2++;
-  }
-}
-
-// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
-// operation, the assumption here is that result array is initialized to valid
-// values.
-template <typename T>
-inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
-                                                    const T* batch_vector,
-                                                    int n_batch, T* result) {
-  for (int b = 0; b < n_batch; b++) {
-    VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
-    // Update the pointers.
-    result += v_size;
-    batch_vector += v_size;
-  }
-}
-
-// Batch vector initialization with another vector.
-template <typename T>
-void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
-                             T* batch_vector) {
-  for (int b = 0; b < n_batch; b++) {
-    std::copy_n(vector, v_size, batch_vector + b * v_size);
-  }
-}
-
-// Checks if all entries of vector are zero for float.
-bool IsZeroVector(const float* vector, int v_size);
-
-// Checks if all entries of vector are zero for int8.
-bool IsZeroVector(const int8_t* vector, int v_size);
-
-// Quantizes a buffer of floating point values using a symmetric quantization
-// (i.e. linear quantization without an offset) to 8-bit signed integers.
-// It also outputs the range (min, max) of the floating point buffer, and the
-// scaling factor used to quantize the values.
-void SymmetricQuantizeFloats(const float* values, const int size,
-                             int8_t* quantized_values, float* min_value,
-                             float* max_value, float* scaling_factor);
-
-// Quantizes a buffer of floating point values using a symmetric quantization
-// (i.e. linear quantization without an offset) to 8-bit signed integers.
-// It uses the range (min, max) provided to the function to calculate the
-// appropriate scaling factor to quantize the values.
-void SymmetricQuantizeFloats(const float* values, const int size,
-                             int8_t* quantized_values, float min_value,
-                             float max_value, float* scaling_factor);
-
-void AsymmetricQuantizeFloats(const float* values, const int size,
-                              int8_t* quantized_values, float* scaling_factor,
-                              int32_t* offset);
-
-// Helper function to quantize floats.
-// float_data_ptr     input float vectors
-// n_batch            number of input vectors
-// n_data             size of a single input vector
-// quantized_data_ptr (out) vector with quantized data
-// scaling_factors    (out) scaling factors (one per vector)
-// zero_points        (out) zero points (one per vector)
-// do_asymmetric      controls if the quantization should be asymmetric.
-inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
-                                int n_data, int8_t* quantized_data_ptr,
-                                float* scaling_factors, int32_t* zero_points,
-                                bool do_asymmetric) {
-  for (int b = 0; b < n_batch; ++b) {
-    const int offset = b * n_data;
-    if (do_asymmetric) {
-      tensor_utils::AsymmetricQuantizeFloats(
-          float_data_ptr + offset, n_data, quantized_data_ptr + offset,
-          &scaling_factors[b], &zero_points[b]);
-    } else {
-      float unused_min, unused_max;
-      tensor_utils::SymmetricQuantizeFloats(
-          float_data_ptr + offset, n_data, quantized_data_ptr + offset,
-          &unused_min, &unused_max, &scaling_factors[b]);
-    }
-  }
-}
-
-// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
-// dimension composed by input vectors independent from each other). The result
-// of the multiplication is accumulated to the passed result buffer.
-// More specifically, for a matrix M of shape [n, i] and a batched-vector
-// of shape [i, batch] it will first compute the product of shape [n, batch].
-// This product will be accumulated to the result buffer.
-void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
-                                         int m_cols, const float* vector,
-                                         int n_batch, float* result);
-
-// Same as the function above, but the matrix is a sparse tensor with block
-// pattern 1x4.
-// This function assumes that m_cols is a multiple of the block size (4 in this
-// case) so that there's no incomplete block.
-void SparseMatrixBatchVectorMultiplyAccumulate1x4(
-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
-    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
-
-// Same as the function above, but the matrix is stored in block compressed
-// sparse row format with block pattern 1x16 which consists of two arrays:
-//   1. A matrix array stores non-zero blocks of the matrix in row major.
-//   2. A ledger array stores nrows groups, one group per row. Each group starts
-//      with an integer representing the number of non-zero blocks for the
-//      corresponding row and follows with column indexes of the first element
-//      of each non-zero block.
-// This function assumes that
-//   1. m_cols is a multiple of 16 so that all blocks are full blocks.
-//   2. m_cols < 254 * 16 so that block index can be represented by uint8.
-void SparseMatrixBatchVectorMultiplyAccumulate(
-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
-    float* __restrict__ result);
-
-// Same as the function above, but for values quantized using symmetric
-// quantization (e.g. by calling SymmetricQuantizeFloats).
-// The passed scaling factors is a buffer of the quantization scaling factors
-// that will be used to dequentize the products into the final result buffer.
-// These scaling factors are the multiplication of the matrix scaling factor
-// by the vector's scaling factor, one per batch (i.e. this allows quantizing
-// each batch in the batch-vector matrix independently).
-void MatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
-    const int8_t* __restrict__ vectors,
-    const float* __restrict__ scaling_factors, int n_batch,
-    float* __restrict__ result);
-
-// Same as the function above except that vector values
-// are quantized with asymmetric quantization per-batch and the matrix
-// is quantized per row.
-void MatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
-    const int8_t* __restrict__ vectors,
-    const float* __restrict__ scaling_factors, int n_batch,
-    float* __restrict__ result, const float* __restrict__ per_channel_scale,
-    const int32_t* __restrict__ input_offset);
-
-// Same as the function above, but the matrix is a sparse tensor with block
-// pattern 1x16.
-// This function assumes that m_cols is a multiple of the block size (16 in this
-// case) so that there's no incomplete block. Also, it assumes all offsets of
-// input, output and filter are zero.
-void SparseMatrixBatchVectorMultiplyAccumulate1x16(
-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
-    const int32_t output_shift, const int32_t output_offset,
-    const int32_t output_activation_min, const int32_t output_activation_max,
-    int8_t* __restrict__ result);
-
-// Same as the function above, but the matrix is stored in block compressed
-// sparse row format with block pattern 1x16 which consists of two arrays:
-//   1. A matrix array stores non-zero blocks of the matrix in row major.
-//   2. A ledger array stores nrows groups, one group per row. Each group starts
-//      with an integer representing the number of non-zero blocks for the
-//      corresponding row followed by column index of the first element of
-//      each non-zero block.
-// This function assumes that
-//   1. m_cols is a multiple of 16 so that all blocks are full blocks.
-//   2. m_cols < 254 * 16 so that block index can be represented by uint8.
-void SparseMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger,
-    const int m_rows, const int m_cols, const int8_t* __restrict__ vectors,
-    const float* __restrict__ scaling_factors, int n_batch,
-    float* __restrict__ result);
-
-// Same as the above 8, 8, 8 integer matmul except for the presence of zero
-// point and non-accumulative.
-// TODO(b/148688698): remove this function by folding zero point calculation in
-// prepare() function.
-void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
-                               const int8_t* input_to_gate_weights,
-                               int32_t input_to_gate_effective_scale_a,
-                               int32_t input_to_gate_effective_scale_b,
-                               int32_t n_batch, int32_t n_input, int32_t n_cell,
-                               int8_t* gate_output, int8_t gate_output_zp);
-
-// Same as above but has 16 bit and 8 bit input and 8 bit output.
-// Used in projection when hidden is 16bit.
-void MatrixBatchVectorMultiply(const int16_t* hidden,
-                               const int8_t* hidden_to_output_weights,
-                               int32_t proj_effective_scale_a,
-                               int32_t proj_effective_scale_b,
-                               const int32_t* gate_bias, int32_t n_batch,
-                               int32_t n_hidden, int32_t n_output,
-                               int32_t output_zp, int8_t* proj_output);
-
-// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
-// vector.
-// Parameters:
-//     - input: batch vector of size n_batch * n_input; 16 bit.
-//     - layer_norm_weights:  the quantized layer normalization weights.
-//     - bias: the bias for the layer normalization.
-//     - layer_norm_scale_a: multiplier for scale factor.
-//     - layer_norm_scale_b: shift for scale factor.
-//     - variance_limit: the guard to make sure the inverse does not overflow.
-//     - n_batch: the number of batches.
-//     - n_input: the size for input and output.
-//     - output:  the 16 bit output
-void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
-                    const int32_t* bias, int32_t layer_norm_scale_a,
-                    int32_t layer_norm_scale_b, int32_t variance_limit,
-                    int n_batch, int n_input, int16_t* output);
-
-// Same as above but the internal calculation is done in float.
-void ApplyLayerNormFloat(const int16_t* input,
-                         const int16_t* layer_norm_weights,
-                         int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
-                         const int32_t* bias, int n_batch, int n_input,
-                         int16_t* output);
-
-// Apply Sigmoid to a quantized vector.
-// Parameters:
-//     - input: batch vector of size n_batch * n_input; 16 bit.
-//     - n_batch: the number of batches.
-//     - n_input: the size for input and output.
-//     - output:  the 16 bit output
-// The input is in Q3.12 format and the output is in Q0.15 format.
-void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
-                  int16_t* output);
-
-// Same as above but the internal calcualtion is float.
-void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
-                       int16_t* output);
-
-// Apply Tanh to a quantized vector.
-// Parameters:
-//     - integer_bits: the integer bits of the input.
-//                     Currently supports 0, 1, 2, 3, 4, 5, 6.
-//     - input: batch vector of size n_batch * n_input; 16 bit.
-//     - n_batch: the number of batches.
-//     - n_input: the size for input and output.
-//     - output:  the 16 bit output
-// The input is in Qm.15-m format and the output is in Q0.15 format.
-void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch,
-               int32_t n_input, int16_t* output);
-
-// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
-//    - Input has 2^(integer_bits) as scale.
-//    - Output has Q0.15 as scale.
-void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
-                    int32_t integer_bits, int16_t* output);
-
-// Element-wise multiplication of two quantized vectors.
-// Parameters:
-//     - input_1: batch vector of size n_batch * n_input; 16 bit.
-//     - input_2: batch vector of size n_batch * n_input; 16 bit.
-//     - n_batch: the number of batches.
-//     - n_input: the size for input and output.
-//     - shift:   the shift needed to produce the output.
-//     - output:  the 16 bit output of size n_batch * n_input.
-// Output does not need to be initialized.
-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
-              int n_input, int shift, int16_t* output);
-
-// Element-wise multiplication of two quantized vectors.
-// Parameters:
-//     - input_1: batch vector of size n_batch * n_input; 16 bit.
-//     - input_2: batch vector of size n_batch * n_input; 16 bit.
-//     - n_batch: the number of batches.
-//     - n_input: the size for input and output.
-//     - shift:   the shift needed to produce the output.
-//     - output:  the 8 bit output of size n_batch * n_input.
-// Output does not need to be initialized.
-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
-              int n_input, int shift, int8_t* output);
-
-// Element-wise multiplication of two quantized vectors with rescaling.
-// Parameters:
-//     - input_1:    batch vector of size n_batch * n_input; 16 bit.
-//     - input_2:    batch vector of size n_batch * n_input; 16 bit.
-//     - multiplier: the multiplier part of scale.
-//     - shift:      the shift part of scale.
-//     - n_batch:    the number of batches.
-//     - n_input:    the size for input and output.
-//     - output:     the 8 bit output of size n_batch * n_input.
-//     - output_zp:  the zero point of output.
-// Output does not need to be initialized.
-// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
-// 2^(s - 31).
-void CwiseMul(const int16_t* input_1, const int16_t* input_2,
-              int32_t multiplier, int32_t shift, int32_t n_batch,
-              int32_t n_input, int32_t output_zp, int8_t* output);
-
-// Element-wise saturating addition of two quantized vectors without rescaling.
-// Parameters:
-//     - input_1:    batch vector of size n_batch * n_input; 16 bit.
-//     - input_2:    batch vector of size n_batch * n_input; 16 bit.
-//     - n_batch:    the number of batches.
-//     - n_input:    the size for input and output.
-//     - output:     the 8 bit output of size n_batch * n_input.
-// Output does not need to be initialized.
-void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
-              int n_input, int16_t* output);
-
-// Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
-// int8_t. Parameters:
-//     - vector:         vector of size v_size.
-//     - v_size:         the size of the vector.
-//     - clipping_value: the value used for clipping.
-void CwiseClipping(float* vector, const int v_size, const float clipping_value);
-void CwiseClipping(int16_t* vector, const int v_size,
-                   const int16_t clipping_value);
-void CwiseClipping(int8_t* vector, const int v_size,
-                   const int8_t clipping_value);
-
-// Dot product of two vectors.
-float VectorVectorDotProduct(const float* vector1, const float* vector2,
-                             int v_size);
-
-// Dot product of two batch vectors of size n_batch * v_size:
-// vector1 = [x_1_1, x_1_2, ..., x_1_vsize,
-//            x_2_1, x_2_2, ..., x_2_vsize,
-//            ...
-//            x_nbatch_1,..., x_nbatch_vsize]
-// vector2 = [y_1_1, y_1_2, ..., y_1_vsize,
-//            y_2_1, y_2_2, ..., y_2_vsize,
-//            ...
-//            y_nbatch_1,..., y_nbatch_vsize]
-// Then result will be a vector of n_batch size starting from 'result':
-// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize,
-//  x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize,
-//  ...
-//  x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize]
-template <typename T>
-inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2,
-                                             int v_size, int n_batch,
-                                             T* result) {
-  for (int b = 0; b < n_batch; b++) {
-    result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
-    vector1 += v_size;
-    vector2 += v_size;
-  }
-}
-
-// Same as above but input is 16bit and output is 32bit.
-void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
-                                      const int16_t* vector2, int v_size,
-                                      int n_batch, int32_t* result);
-
-// Same as above, but inputs are 16bit integer and output is 16bit integer.
-void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
-                                             const int16_t* batch_vector,
-                                             int n_batch, int32_t multiplier,
-                                             int shift, int16_t* result);
-
-// Compute "1.0f - elements of vector" (used in CIFG).
-void Sub1Vector(const float* vector, int v_size, float* result);
-
-// Compute "1.0f - elements of vector" (used in CIFG) for int16 input.
-// "vector" has range [0, 32767] because it is the output of sigmoid function.
-void Sub1Vector(const int16_t* vector, int v_size, int16_t* result);
-
-// Multiply all elements of vector with a scalar.
-void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
-                          float* result);
-
-// Reduce-sum on a float input vector:
-// input_vector: float pointer to input vector.
-// output_vector: float pointer to vector.
-// output_size: output vector size.
-// reduction_size: number of consecutive elements from input vector which are
-// added to get one element of output.
-void ReductionSumVector(const float* input_vector, float* output_vector,
-                        int output_size, int reduction_size);
-
-// Same as above but input/output is 32 bit integer.
-void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
-                        int output_size, int reduction_size);
-
-// Same as above but input is 8 bit integer.
-void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
-                        int output_size, int reduction_size);
-
-// Layer norm for each batch.
-void MeanStddevNormalization(const float* input_vector, float* output_vector,
-                             int v_size, int n_batch);
-
-// Saturate Add with rescale on both inputs.
-void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
-                          const int8_t* recurrent, int8_t recurrent_zp,
-                          int32_t input_effective_scale_a,
-                          int32_t input_effective_scale_b,
-                          int32_t recurrent_effective_scale_a,
-                          int32_t recurrent_effective_scale_b, int32_t n_batch,
-                          int32_t n_cell, int16_t* output);
-
-}  // namespace tensor_utils
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_

+ 0 - 416
code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc

@@ -1,416 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-
-#include <algorithm>
-#include <cmath>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-
-namespace tflite {
-
-namespace {
-// These constants are used to manipulate the binary representation of doubles.
-// Double-precision binary64 floating point format is:
-// Bit |  63  |  62-52   |   51-0   |
-//     | Sign | Exponent | Fraction |
-// To avoid 64-bit integers as much as possible, I break this into high and
-// low 32-bit chunks. High is:
-// Bit |  31  |  30-20   |      19-0     |
-//     | Sign | Exponent | High Fraction |
-// Low is:
-// Bit |     31-0     |
-//     | Low Fraction |
-// We then access the components through logical bit-wise operations to
-// extract the parts needed, with the positions and masks derived from the
-// layout shown above.
-constexpr uint64_t kSignMask = 0x8000000000000000LL;
-constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
-constexpr int32_t kExponentShift = 52;
-constexpr int32_t kExponentBias = 1023;
-constexpr uint32_t kExponentIsBadNum = 0x7ff;
-constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
-constexpr uint32_t kFractionShift = 22;
-constexpr uint32_t kFractionRoundingMask = 0x003fffff;
-constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
-}  // namespace
-
-void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
-                        int* shift) {
-#if TFLITE_SINGLE_ROUNDING
-  // Single-rounding MultiplyByQuantizedMultiplier only supports positive
-  // multipliers.
-  // TFLITE_DCHECK(double_multiplier >= 0);
-#endif
-  if (double_multiplier == 0.) {
-    *quantized_multiplier = 0;
-    *shift = 0;
-    return;
-  }
-#ifdef TFLITE_EMULATE_FLOAT
-  // If we're trying to avoid the use of floating-point instructions (for
-  // example on microcontrollers) then use an alternative implementation
-  // that only requires integer and bitwise operations. To enable this, you
-  // need to set the define during the build process for your platform.
-  int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
-#else   // TFLITE_EMULATE_FLOAT
-  const double q = std::frexp(double_multiplier, shift);
-  auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1LL << 31)));
-#endif  // TFLITE_EMULATE_FLOAT
-  TFLITE_CHECK(q_fixed <= (1LL << 31));
-  if (q_fixed == (1LL << 31)) {
-    q_fixed /= 2;
-    ++*shift;
-  }
-  TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
-  // A shift amount smaller than -31 would cause all bits to be shifted out
-  // and thus all results would be zero. We implement that instead with
-  // q_fixed==0, so as to avoid hitting issues with right-shift
-  // operations with shift amounts greater than 31. Note that this happens
-  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
-  // that we're effectively flushing tiny double_multiplier's to zero.
-  // We could conceivably handle values in the range (roughly) [32, 63]
-  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
-  // the present handling is just doing 'flush denormals to zero'. We could
-  // reconsider and actually generate nonzero denormals if a need arises.
-  if (*shift < -31) {
-    *shift = 0;
-    q_fixed = 0;
-  }
-#if TFLITE_SINGLE_ROUNDING
-  // Single-rounding MultiplyByQuantizedMultiplier doesn't support a shift > 30,
-  // saturate it.
-  if (*shift > 30) {
-    *shift = 30;
-    q_fixed = (1LL << 31) - 1;
-  }
-#endif
-  *quantized_multiplier = static_cast<int32_t>(q_fixed);
-}
-
-void QuantizeMultiplierGreaterThanOne(double double_multiplier,
-                                      int32_t* quantized_multiplier,
-                                      int* left_shift) {
-  TFLITE_CHECK_GT(double_multiplier, 1.);
-  QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
-  TFLITE_CHECK_GE(*left_shift, 0);
-}
-
-void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
-                                         int32_t* quantized_multiplier,
-                                         int* left_shift) {
-  TFLITE_CHECK_LT(double_multiplier, 1.);
-  TFLITE_CHECK_GT(double_multiplier, 0.);
-  int shift;
-  QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
-  TFLITE_CHECK_LE(shift, 0);
-  *left_shift = shift;
-}
-
-int64_t IntegerFrExp(double input, int* shift) {
-  // Make sure our assumptions about the double layout hold.
-  TFLITE_CHECK_EQ(8, sizeof(double));
-
-  // We want to access the bits of the input double value directly, which is
-  // tricky to do safely, so use a union to handle the casting.
-  union {
-    double double_value;
-    uint64_t double_as_uint;
-  } cast_union;
-  cast_union.double_value = input;
-  const uint64_t u = cast_union.double_as_uint;
-
-  // If the bitfield is all zeros apart from the sign bit, this is a normalized
-  // zero value, so return standard values for this special case.
-  if ((u & ~kSignMask) == 0) {
-    *shift = 0;
-    return 0;
-  }
-
-  // Deal with NaNs and Infs, which are always indicated with a fixed pattern in
-  // the exponent, and distinguished by whether the fractions are zero or
-  // non-zero.
-  const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
-  if (exponent_part == kExponentIsBadNum) {
-    *shift = std::numeric_limits<int>::max();
-    if (u & kFractionMask) {
-      // NaN, so just return zero (with the exponent set to INT_MAX).
-      return 0;
-    } else {
-      // Infinity, so return +/- INT_MAX.
-      if (u & kSignMask) {
-        return std::numeric_limits<int64_t>::min();
-      } else {
-        return std::numeric_limits<int64_t>::max();
-      }
-    }
-  }
-
-  // The shift is fairly easy to extract from the high bits of the double value,
-  // just by masking it out and applying a bias. The std::frexp() implementation
-  // always returns values between 0.5 and 1.0 though, whereas the exponent
-  // assumes 1.0 to 2.0 is the standard range, so I add on one to match that
-  // interface.
-  *shift = (exponent_part - kExponentBias) + 1;
-
-  // There's an implicit high bit in the double format definition, so make sure
-  // we include that at the top, and then reconstruct the rest of the fractional
-  // value from the remaining fragments.
-  int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
-
-  // We're cutting off some bits at the bottom, so to exactly match the standard
-  // frexp implementation here we'll apply rounding by adding one to the least
-  // significant bit of the result if the discarded portion is over half of the
-  // maximum.
-  if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
-    fraction += 1;
-  }
-  // Negate the fraction if the sign bit was set.
-  if (u & kSignMask) {
-    fraction *= -1;
-  }
-
-  return fraction;
-}
-
-double DoubleFromFractionAndShift(int64_t fraction, int shift) {
-  union {
-    double double_value;
-    uint64_t double_as_uint;
-  } result;
-
-  // Detect NaNs and infinities.
-  if (shift == std::numeric_limits<int>::max()) {
-    if (fraction == 0) {
-      return std::numeric_limits<double>::quiet_NaN();
-    } else if (fraction > 0) {
-      return std::numeric_limits<double>::infinity();
-    } else {
-      return -std::numeric_limits<double>::infinity();
-    }
-  }
-
-  // Return a normalized zero for a zero fraction.
-  if (fraction == 0) {
-    result.double_as_uint = 0;
-    return result.double_value;
-  }
-
-  bool is_negative = (fraction < 0);
-  int64_t encoded_fraction = is_negative ? -fraction : fraction;
-  int64_t encoded_shift = (shift - 1);
-  while (encoded_fraction < 0x40000000) {
-    encoded_fraction *= 2;
-    encoded_shift -= 1;
-  }
-  while (encoded_fraction > 0x80000000) {
-    encoded_fraction /= 2;
-    encoded_shift += 1;
-  }
-  encoded_fraction -= 0x40000000;
-  if (encoded_shift < -1022) {
-    encoded_shift = -1023;
-  } else if (encoded_shift > 1022) {
-    encoded_shift = 1023;
-  }
-  encoded_shift += kExponentBias;
-  uint64_t encoded_sign = is_negative ? kSignMask : 0;
-  result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
-                          (encoded_fraction << kFractionShift);
-  return result.double_value;
-}
-
-double IntegerDoubleMultiply(double a, double b) {
-  int a_shift;
-  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
-  int b_shift;
-  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
-  // Detect NaNs and infinities.
-  if (a_shift == std::numeric_limits<int>::max() ||
-      (b_shift == std::numeric_limits<int>::max())) {
-    return std::numeric_limits<double>::quiet_NaN();
-  }
-  const int result_shift = a_shift + b_shift + 1;
-  const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
-  return DoubleFromFractionAndShift(result_fraction, result_shift);
-}
-
-int IntegerDoubleCompare(double a, double b) {
-  int a_shift;
-  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
-  int b_shift;
-  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
-
-  // Detect NaNs and infinities.
-  if (a_shift == std::numeric_limits<int>::max() ||
-      (b_shift == std::numeric_limits<int>::max())) {
-    return 1;
-  }
-
-  if ((a_fraction == 0) && (b_fraction < 0)) {
-    return 1;
-  } else if ((a_fraction < 0) && (b_fraction == 0)) {
-    return -1;
-  } else if (a_shift < b_shift) {
-    return -1;
-  } else if (a_shift > b_shift) {
-    return 1;
-  } else if (a_fraction < b_fraction) {
-    return -1;
-  } else if (a_fraction > b_fraction) {
-    return 1;
-  } else {
-    return 0;
-  }
-}
-
-void PreprocessSoftmaxScaling(double beta, double input_scale,
-                              int input_integer_bits,
-                              int32_t* quantized_multiplier, int* left_shift) {
-  // If the overall multiplier (input and beta) is large, then exp() of an
-  // input difference of 1 scaled by this will be large.  In other words, we
-  // can cap the multiplier and know that, when it is used, the output will be
-  // (round to) zero wherever the input is not at the maximum value.
-
-  // If the overall scale is less than one, and input_integer_bits=0, then the
-  // result is double equivalent of Q0.31 (actually with more precision). Thus
-  // this generates a Q(input_integer_bits).(31-input_integer_bits)
-  // representation.
-#if TFLITE_SINGLE_ROUNDING
-  const double max_real_multiplier = (1LL << 30) - 1.0;
-#else
-  const double max_real_multiplier = (1LL << 31) - 1.0;
-#endif
-
-#ifdef TFLITE_EMULATE_FLOAT
-  const double input_beta = IntegerDoubleMultiply(beta, input_scale);
-  int shift;
-  int64_t fraction = IntegerFrExp(input_beta, &shift);
-  shift += (31 - input_integer_bits);
-  double input_beta_real_multiplier =
-      DoubleFromFractionAndShift(fraction, shift);
-  if (IntegerDoubleCompare(input_beta_real_multiplier, max_real_multiplier) >
-      0) {
-    input_beta_real_multiplier = max_real_multiplier;
-  }
-#else   // TFLITE_EMULATE_FLOAT
-  const double input_beta_real_multiplier =
-      std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)),
-                       max_real_multiplier);
-#endif  // TFLITE_EMULATE_FLOAT
-
-  QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
-                                   quantized_multiplier, left_shift);
-}
-
-void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
-                                    int input_integer_bits,
-                                    int32_t* quantized_multiplier,
-                                    int* left_shift,
-                                    int32_t* reverse_scaling_divisor,
-                                    int* reverse_scaling_left_shift) {
-  PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
-                           quantized_multiplier, left_shift);
-
-  // Also calculate what amounts to the inverse scaling factor for the input.
-  const double real_reverse_scaling_divisor =
-      (1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
-  tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
-                                              reverse_scaling_divisor,
-                                              reverse_scaling_left_shift);
-}
-
-int CalculateInputRadius(int input_integer_bits, int input_left_shift,
-                         int total_signed_bits) {
-#ifdef TFLITE_EMULATE_FLOAT
-  int64_t result = (1 << input_integer_bits) - 1;
-  result <<= (total_signed_bits - input_integer_bits);
-  result >>= input_left_shift;
-  return result;
-#else   // TFLITE_EMULATE_FLOAT
-  const double max_input_rescaled =
-      1.0 * ((1 << input_integer_bits) - 1) *
-      (1LL << (total_signed_bits - input_integer_bits)) /
-      (1LL << input_left_shift);
-  // Tighten bound using floor.  Suppose that we could use the exact value.
-  // After scaling the difference, the result would be at the maximum.  Thus we
-  // must ensure that our value has lower magnitude.
-  return static_cast<int>(std::floor(max_input_rescaled));
-#endif  // TFLITE_EMULATE_FLOAT
-}
-
-void NudgeQuantizationRange(const float min, const float max,
-                            const int quant_min, const int quant_max,
-                            float* nudged_min, float* nudged_max,
-                            float* nudged_scale) {
-  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
-  const float quant_min_float = static_cast<float>(quant_min);
-  const float quant_max_float = static_cast<float>(quant_max);
-  *nudged_scale = (max - min) / (quant_max_float - quant_min_float);
-  const float zero_point_from_min = quant_min_float - min / *nudged_scale;
-  uint16_t nudged_zero_point;
-  if (zero_point_from_min < quant_min_float) {
-    nudged_zero_point = static_cast<uint16_t>(quant_min);
-  } else if (zero_point_from_min > quant_max_float) {
-    nudged_zero_point = static_cast<uint16_t>(quant_max);
-  } else {
-    nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
-  }
-  *nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
-  *nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
-}
-
-void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
-                       const float nudged_max, const float* input_data,
-                       float* output_data, const float size) {
-  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
-  const float inv_nudged_scale = 1.0f / nudged_scale;
-
-  for (int i = 0; i < size; i++) {
-    const float src_val = input_data[i];
-    const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
-    const float clamped_shifted = clamped - nudged_min;
-    const float dst_val =
-        TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
-        nudged_min;
-    output_data[i] = dst_val;
-  }
-}
-
-bool CheckedLog2(const float x, int* log2_result) {
-  // Using TfLiteRound instead of std::round and std::log instead of
-  // std::log2 to work around these functions being missing in a toolchain
-  // used in some TensorFlow tests as of May 2018.
-  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
-  const float x_log2_rounded = TfLiteRound(x_log2);
-  const float x_log2_fracpart = x_log2 - x_log2_rounded;
-
-  *log2_result = static_cast<int>(x_log2_rounded);
-  return std::abs(x_log2_fracpart) < 1e-3f;
-}
-
-void QuantizeMultiplierArray(const double* effective_scales, size_t size,
-                             int32_t* effective_scale_significand,
-                             int* effective_shift) {
-  for (size_t i = 0; i < size; ++i) {
-    QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
-                       &effective_shift[i]);
-  }
-}
-
-}  // namespace tflite

+ 0 - 292
code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h

@@ -1,292 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
-
-#include <cmath>
-#include <cstdint>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-// Given the min and max values of a float array, return
-// reasonable quantization parameters to use for this array.
-template <typename T>
-QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
-                                            bool narrow_range) {
-  const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
-  const T qmax = std::numeric_limits<T>::max();
-  const double qmin_double = qmin;
-  const double qmax_double = qmax;
-  // 0 should always be a representable value. Let's assume that the initial
-  // min,max range contains 0.
-  TFLITE_CHECK_LE(rmin, 0.);
-  TFLITE_CHECK_GE(rmax, 0.);
-  if (rmin == rmax) {
-    // Special case where the min,max range is a point. Should be {0}.
-    TFLITE_CHECK_EQ(rmin, 0.);
-    TFLITE_CHECK_EQ(rmax, 0.);
-    QuantizationParams quantization_params;
-    quantization_params.zero_point = 0;
-    quantization_params.scale = 0.;
-    return quantization_params;
-  }
-
-  // General case.
-  //
-  // First determine the scale.
-  const double scale = (rmax - rmin) / (qmax_double - qmin_double);
-
-  // Zero-point computation.
-  // First the initial floating-point computation. The zero-point can be
-  // determined from solving an affine equation for any known pair
-  // (real value, corresponding quantized value).
-  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
-  // The arithmetic error on the zero point computed from either pair
-  // will be roughly machine_epsilon * (sum of absolute values of terms)
-  // so we want to use the variant that adds the smaller terms.
-  const double zero_point_from_min = qmin_double - rmin / scale;
-  const double zero_point_from_max = qmax_double - rmax / scale;
-  const double zero_point_from_min_error =
-      std::abs(qmin_double) + std::abs(rmin / scale);
-  const double zero_point_from_max_error =
-      std::abs(qmax_double) + std::abs(rmax / scale);
-
-  const double zero_point_double =
-      zero_point_from_min_error < zero_point_from_max_error
-          ? zero_point_from_min
-          : zero_point_from_max;
-
-  // Now we need to nudge the zero point to be an integer
-  // (our zero points are integer, and this is motivated by the requirement
-  // to be able to represent the real value "0" exactly as a quantized value,
-  // which is required in multiple places, for example in Im2col with SAME
-  // padding).
-  T nudged_zero_point = 0;
-  if (zero_point_double < qmin_double) {
-    nudged_zero_point = qmin;
-  } else if (zero_point_double > qmax_double) {
-    nudged_zero_point = qmax;
-  } else {
-    nudged_zero_point = static_cast<T>(round(zero_point_double));
-  }
-  // The zero point should always be in the range of quantized value,
-  // [qmin, qmax].
-  TFLITE_CHECK_GE(nudged_zero_point, qmin);
-  TFLITE_CHECK_LE(nudged_zero_point, qmax);
-
-  // Finally, store the result nudged quantization params.
-  QuantizationParams quantization_params;
-  quantization_params.zero_point = nudged_zero_point;
-  quantization_params.scale = scale;
-  return quantization_params;
-}
-
-template <typename T>
-QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
-  return ChooseQuantizationParams<T>(rmin, rmax, false);
-}
-
-// Converts a floating-point number to an integer. For all inputs x where
-// static_cast<IntOut>(x) is legal according to the C++ standard, the result
-// is identical to that cast (i.e. the result is x with its fractional part
-// truncated whenever that is representable as IntOut).
-//
-// static_cast would cause undefined behavior for the following cases, which
-// have well-defined behavior for this function:
-//
-//  1. If x is NaN, the result is zero.
-//
-//  2. If the truncated form of x is above the representable range of IntOut,
-//     the result is std::numeric_limits<IntOut>::max().
-//
-//  3. If the truncated form of x is below the representable range of IntOut,
-//     the result is std::numeric_limits<IntOut>::min().
-//
-// Note that cases #2 and #3 cover infinities as well as finite numbers.
-//
-// The range of FloatIn must include the range of IntOut, otherwise
-// the results are undefined.
-// TODO(sfeuz): Replace by absl::SafeCast once available.
-template <class IntOut, class FloatIn>
-IntOut SafeCast(FloatIn x) {
-  static_assert(!std::numeric_limits<FloatIn>::is_integer,
-                "FloatIn is integer");
-  static_assert(std::numeric_limits<IntOut>::is_integer,
-                "IntOut is not integer");
-  static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
-
-  // Special case NaN, for which the logic below doesn't work.
-  if (std::isnan(x)) {
-    return 0;
-  }
-
-  // Negative values all clip to zero for unsigned results.
-  if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
-    return 0;
-  }
-
-  // Handle infinities.
-  if (std::isinf(x)) {
-    return x < 0 ? std::numeric_limits<IntOut>::min()
-                 : std::numeric_limits<IntOut>::max();
-  }
-
-  // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
-  // unless x is zero in which case exp == 0. Note that this implies that the
-  // magnitude of x is strictly less than 2^exp.
-  int exp = 0;
-  std::frexp(x, &exp);
-
-  // Let N be the number of non-sign bits in the representation of IntOut. If
-  // the magnitude of x is strictly less than 2^N, the truncated version of x
-  // is representable as IntOut. The only representable integer for which this
-  // is not the case is kMin for signed types (i.e. -2^N), but that is covered
-  // by the fall-through below.
-  if (exp <= std::numeric_limits<IntOut>::digits) {
-    return x;
-  }
-
-  // Handle numbers with magnitude >= 2^N.
-  return x < 0 ? std::numeric_limits<IntOut>::min()
-               : std::numeric_limits<IntOut>::max();
-}
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of NEGATIVE its exponent ---
-// this is intended as a RIGHT-shift.
-//
-// Restricted to the case where the multiplier < 1 (and non-negative).
-void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
-                                         int32_t* quantized_multiplier,
-                                         int* left_shift);
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of its exponent.
-//
-// Restricted to the case where the multiplier > 1.
-void QuantizeMultiplierGreaterThanOne(double double_multiplier,
-                                      int32_t* quantized_multiplier,
-                                      int* left_shift);
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of its exponent.
-//
-// Handles an arbitrary positive multiplier. The 'shift' output-value is
-// basically the 'floating-point exponent' of the multiplier:
-// Negative for a right-shift (when the multiplier is <1), positive for a
-// left-shift (when the multiplier is >1)
-void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
-                        int* shift);
-
-// Splits a double input value into a returned fraction, and a shift value from
-// the exponent, using only bitwise and integer operations to support
-// microcontrollers and other environments without floating-point support.
-//
-// This is designed to be a replacement for how std::frexp() is used within the
-// QuantizeMultiplier() function, and so has a different signature than the
-// standard version, returning a 64-bit integer rather than a double. This
-// result has a maximum value of 1<<31, with the fraction expressed as a
-// proportion of that maximum.
-//
-// std::frexp() returns NaNs and infinities unmodified, but since we're
-// returning integers that can't represent those values, instead we return
-// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
-// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
-// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
-// result in return values that end up truncating some bits at the end,
-// reflecting the loss of precision inherent in denormalization.
-int64_t IntegerFrExp(double input, int* shift);
-
-// Converts an integer fraction in the format produced by IntegerFrExp (where
-// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
-// IEEE binary64 double format result. The implementation uses only integer and
-// bitwise operators, so no floating point hardware support or emulation is
-// needed. This is here so quantized operations can run non-time-critical
-// preparation calculations on microcontrollers and other platforms without
-// float support.
-double DoubleFromFractionAndShift(int64_t fraction, int shift);
-
-// Performs a multiplication of two numbers in double format, using only integer
-// and bitwise instructions. This is aimed at supporting housekeeping functions
-// for quantized operations on microcontrollers without floating-point hardware.
-double IntegerDoubleMultiply(double a, double b);
-
-// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
-// greater than b. It is implemented using only integer and logical instructions
-// so that it can be easily run on microcontrollers for quantized operations.
-int IntegerDoubleCompare(double a, double b);
-
-// This first creates a multiplier in a double equivalent of
-// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
-// precision in the double's fractional bits.  It then splits the result into
-// significand and exponent.
-void PreprocessSoftmaxScaling(double beta, double input_scale,
-                              int input_integer_bits,
-                              int32_t* quantized_multiplier, int* left_shift);
-// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
-void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
-                                    int input_integer_bits,
-                                    int32_t* quantized_multiplier,
-                                    int* left_shift,
-                                    int32_t* reverse_scaling_divisor,
-                                    int* reverse_scaling_left_shift);
-// Calculate the largest input that will result in a within-bounds intermediate
-// result within MultiplyByQuantizedMultiplierGreaterThanOne.  In other words,
-// it must not overflow before we reduce the value by multiplication by the
-// input multiplier.  The negative radius is used as the minimum difference in
-// Softmax.
-int CalculateInputRadius(int input_integer_bits, int input_left_shift,
-                         int total_signed_bits = 31);
-
-// Nudges a min/max quantization range to ensure zero is zero.
-// Gymnastics with nudged zero point is to ensure that real zero maps to
-// an integer, which is required for e.g. zero-padding in convolutional layers.
-// Outputs nudged_min, nudged_max, nudged_scale.
-void NudgeQuantizationRange(const float min, const float max,
-                            const int quant_min, const int quant_max,
-                            float* nudged_min, float* nudged_max,
-                            float* nudged_scale);
-
-// Fake quantizes (quantizes and dequantizes) input_data using the scale,
-// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
-// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
-void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
-                       const float nudged_max, const float* input_data,
-                       float* output_data, const float size);
-
-// If x is approximately a power of two (with any positive or negative
-// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
-// returns false.
-bool CheckedLog2(const float x, int* log2_result);
-
-// Decomposes an array of double multipliers into a Q0.31 int32 representation
-// of its significand, and shift representation of its exponent.
-//
-// Handles an arbitrary multiplier. The 'shift' output-value is
-// basically the 'floating-point exponent' of the multiplier:
-// Negative for a right-shift (when the multiplier is <1), positive for a
-// left-shift (when the multiplier is >1)
-void QuantizeMultiplierArray(const double* effective_scales, size_t size,
-                             int32_t* effective_scale_significand,
-                             int* effective_shift);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_

+ 0 - 400
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h

@@ -1,400 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
-
-#include <algorithm>
-#include <type_traits>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const T* input1_data,
-                const RuntimeShape& input2_shape, const T* input2_data,
-                const RuntimeShape& output_shape, T* output_data) {
-  T activation_min, activation_max;
-  GetActivationParams(params, &activation_min, &activation_max);
-
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
-        input1_data[i] + input2_data[i], activation_min, activation_max);
-  }
-}
-
-// Element-wise add that can often be used for inner loop of broadcast add as
-// well as the non-broadcast add.
-
-// This function is used for 8-bit as well as for 16-bit, but the accumulator
-// is 32-bit for both cases. The overflow does not happen due to the
-// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
-template <typename T>
-inline void AddElementwise(int size, const ArithmeticParams& params,
-                           const T* input1_data, const T* input2_data,
-                           T* output_data) {
-  TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
-  TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
-  TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
-  TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
-
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sum, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<T>(clamped_output);
-  }
-}
-
-// Scalar-broadcast add that can be used for inner loop of more general
-// broadcast add, so that, for example, scalar-broadcast with batch will still
-// be fast.
-inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
-                               uint8_t input1_data, const uint8_t* input2_data,
-                               uint8_t* output_data) {
-  TFLITE_DCHECK_GT(params.input1_offset, -256);
-  TFLITE_DCHECK_GT(params.input2_offset, -256);
-  TFLITE_DCHECK_LT(params.input1_offset, 256);
-  TFLITE_DCHECK_LT(params.input2_offset, 256);
-
-  const int32_t input1_val = params.input1_offset + input1_data;
-  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-  const int32_t scaled_input1_val =
-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          shifted_input1_val, params.input1_multiplier, params.input1_shift);
-  for (int i = 0; i < size; ++i) {
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sum, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<uint8_t>(clamped_output);
-  }
-}
-
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
-                const RuntimeShape& output_shape, uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  TFLITE_DCHECK_GT(params.input1_offset, -256);
-  TFLITE_DCHECK_GT(params.input2_offset, -256);
-  TFLITE_DCHECK_LT(params.input1_offset, 256);
-  TFLITE_DCHECK_LT(params.input2_offset, 256);
-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void AddGeneralParamScale(const ArithmeticParams& params,
-                                 const RuntimeShape& input1_shape,
-                                 const int16_t* input1_data,
-                                 const RuntimeShape& input2_shape,
-                                 const int16_t* input2_data,
-                                 const RuntimeShape& output_shape,
-                                 int16_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  int max_value = std::numeric_limits<int16_t>::max();
-
-  TFLITE_DCHECK_GT(params.input1_offset, -max_value);
-  TFLITE_DCHECK_GT(params.input2_offset, -max_value);
-  TFLITE_DCHECK_LT(params.input1_offset, max_value);
-  TFLITE_DCHECK_LT(params.input2_offset, max_value);
-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const int16_t* input1_data,
-                const RuntimeShape& input2_shape, const int16_t* input2_data,
-                const RuntimeShape& output_shape, int16_t* output_data,
-                bool pot_scale = true) {
-  if (!pot_scale) {
-    AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
-                         input2_data, output_shape, output_data);
-    return;
-  }
-
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-
-  const int input1_shift = params.input1_shift;
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  const int16_t output_activation_min = params.quantized_activation_min;
-  const int16_t output_activation_max = params.quantized_activation_max;
-
-  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
-  TFLITE_DCHECK_LE(input1_shift, 0);
-  TFLITE_DCHECK_LE(params.input2_shift, 0);
-  const int16_t* not_shift_input =
-      input1_shift == 0 ? input1_data : input2_data;
-  const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
-  const int input_right_shift =
-      input1_shift == 0 ? -params.input2_shift : -input1_shift;
-
-  for (int i = 0; i < flat_size; i++) {
-    // F0 uses 0 integer bits, range [-1, 1].
-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-
-    F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
-    F0 scaled_input = F0::FromRaw(
-        gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
-    F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
-    const int16_t raw_output = result.raw();
-    const int16_t clamped_output = std::min(
-        output_activation_max, std::max(output_activation_min, raw_output));
-    output_data[i] = clamped_output;
-  }
-}
-
-template <typename T>
-inline typename std::enable_if<!is_small_integer<T>::value, void>::type
-BroadcastAdd4DSlow(const ArithmeticParams& params,
-                   const RuntimeShape& input1_shape, const T* input1_data,
-                   const RuntimeShape& input2_shape, const T* input2_data,
-                   const RuntimeShape& output_shape, T* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  T activation_min, activation_max;
-  GetActivationParams(params, &activation_min, &activation_max);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              ActivationFunctionWithMinMax<T>(
-                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
-                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
-                  activation_min, activation_max);
-        }
-      }
-    }
-  }
-}
-
-// This function is used for 8-bit as well as for 16-bit, but the accumulator
-// is 32-bit for both cases. The overflow does not happen due to the
-// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
-template <typename T>
-inline typename std::enable_if<is_small_integer<T>::value, void>::type
-BroadcastAdd4DSlow(const ArithmeticParams& params,
-                   const RuntimeShape& input1_shape, const T* input1_data,
-                   const RuntimeShape& input2_shape, const T* input2_data,
-                   const RuntimeShape& output_shape, T* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          const int32_t input1_val =
-              params.input1_offset +
-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
-          const int32_t input2_val =
-              params.input2_offset +
-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
-          const int32_t shifted_input1_val =
-              input1_val * (1 << params.left_shift);
-          const int32_t shifted_input2_val =
-              input2_val * (1 << params.left_shift);
-          const int32_t scaled_input1_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input1_val, params.input1_multiplier,
-                  params.input1_shift);
-          const int32_t scaled_input2_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input2_val, params.input2_multiplier,
-                  params.input2_shift);
-          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-          const int32_t raw_output =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  raw_sum, params.output_multiplier, params.output_shift) +
-              params.output_offset;
-          const int32_t clamped_output =
-              std::min(params.quantized_activation_max,
-                       std::max(params.quantized_activation_min, raw_output));
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              static_cast<T>(clamped_output);
-        }
-      }
-    }
-  }
-}
-
-inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
-                                 const RuntimeShape& unswitched_input1_shape,
-                                 const uint8_t* unswitched_input1_data,
-                                 const RuntimeShape& unswitched_input2_shape,
-                                 const uint8_t* unswitched_input2_data,
-                                 const RuntimeShape& output_shape,
-                                 uint8_t* output_data) {
-  ArithmeticParams switched_params = unswitched_params;
-  switched_params.input1_offset = unswitched_params.input2_offset;
-  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
-  switched_params.input1_shift = unswitched_params.input2_shift;
-  switched_params.input2_offset = unswitched_params.input1_offset;
-  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
-  switched_params.input2_shift = unswitched_params.input1_shift;
-
-  const bool use_unswitched =
-      unswitched_params.broadcast_category ==
-      tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
-
-  const ArithmeticParams& params =
-      use_unswitched ? unswitched_params : switched_params;
-  const uint8_t* input1_data =
-      use_unswitched ? unswitched_input1_data : unswitched_input2_data;
-  const uint8_t* input2_data =
-      use_unswitched ? unswitched_input2_data : unswitched_input1_data;
-
-  // Fivefold nested loops. The second input resets its position for each
-  // iteration of the second loop. The first input resets its position at the
-  // beginning of the fourth loop. The innermost loop is an elementwise add of
-  // sections of the arrays.
-  uint8_t* output_data_ptr = output_data;
-  const uint8_t* input1_data_ptr = input1_data;
-  const uint8_t* input2_data_reset = input2_data;
-  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
-  // between input shapes. y3 for input 1 is always broadcast, and so the
-  // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
-  // Put another way,
-  // input1.shape.FlatSize = y0 * y1 * y2 * y4,
-  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
-  int y0 = params.broadcast_shape[0];
-  int y1 = params.broadcast_shape[1];
-  int y2 = params.broadcast_shape[2];
-  int y3 = params.broadcast_shape[3];
-  int y4 = params.broadcast_shape[4];
-  if (y4 > 1) {
-    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
-    // dimension.
-    for (int i0 = 0; i0 < y0; ++i0) {
-      const uint8_t* input2_data_ptr;
-      for (int i1 = 0; i1 < y1; ++i1) {
-        input2_data_ptr = input2_data_reset;
-        for (int i2 = 0; i2 < y2; ++i2) {
-          for (int i3 = 0; i3 < y3; ++i3) {
-            AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
-                           output_data_ptr);
-            input2_data_ptr += y4;
-            output_data_ptr += y4;
-          }
-          // We have broadcast y4 of input1 data y3 times, and now move on.
-          input1_data_ptr += y4;
-        }
-      }
-      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
-      input2_data_reset = input2_data_ptr;
-    }
-  } else {
-    // Special case of y4 == 1, in which the innermost loop is a single element
-    // and can be combined with the next (y3) as an inner broadcast.
-    //
-    // Note that this handles the case of pure scalar broadcast when
-    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
-    // broadcast with batch (as y2 > 1).
-    //
-    // NOTE The process is the same as the above general case except simplified
-    // for y4 == 1 and the loop over y3 is contained within the
-    // AddScalarBroadcast function.
-    for (int i0 = 0; i0 < y0; ++i0) {
-      const uint8_t* input2_data_ptr;
-      for (int i1 = 0; i1 < y1; ++i1) {
-        input2_data_ptr = input2_data_reset;
-        for (int i2 = 0; i2 < y2; ++i2) {
-          AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
-                             output_data_ptr);
-          input2_data_ptr += y3;
-          output_data_ptr += y3;
-          input1_data_ptr += 1;
-        }
-      }
-      input2_data_reset = input2_data_ptr;
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_

+ 0 - 86
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h

@@ -1,86 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
-
-#include <algorithm>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_ops {
-
-// T is expected to be either float or int.
-template <typename T>
-inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
-                 const T* const* input_data, T* output_data) {
-  // All inputs and output should have the same shape, this is checked during
-  // Prepare stage.
-  const size_t size = input_shape.FlatSize();
-  for (size_t i = 0; i < size; ++i) {
-    T x = 0;
-    for (size_t j = 0; j < num_inputs; ++j) {
-      x += input_data[j][i];
-    }
-    output_data[i] = x;
-  }
-}
-
-inline void AddN(const ArithmeticParams& params,
-                 const RuntimeShape& input_shape, const size_t num_inputs,
-                 const int8_t* const* input_data, int8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  // Input offset is negative input zero point. Activation tensors are
-  // asymmetric quantized so they span the full int8 range.
-  // All inputs should have same zero-point and scale, this is checked during
-  // Prepare stage.
-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
-
-  // All inputs and output should have the same shape, this is checked during
-  // Prepare stage.
-  const size_t size = input_shape.FlatSize();
-  for (size_t i = 0; i < size; ++i) {
-    // accumulate in scaled_x before clamping to avoid overflow
-    const int32_t x = params.input1_offset;  // x = 0
-    const int32_t shifted_x = x * (1 << params.left_shift);
-    int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-        shifted_x, params.input1_multiplier, params.input1_shift);
-
-    for (size_t j = 0; j < num_inputs; ++j) {
-      const int32_t y = params.input1_offset + input_data[j][i];
-      const int32_t shifted_y = y * (1 << params.left_shift);
-      int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          shifted_y, params.input1_multiplier, params.input1_shift);
-      scaled_x += scaled_y;
-    }
-
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            scaled_x, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<int8_t>(clamped_output);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_

+ 0 - 88
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h

@@ -1,88 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
-
-#include <functional>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-std::function<bool(T, T)> GetComparefunction(bool is_arg_max) {
-  if (is_arg_max) {
-    return std::greater<T>();
-  } else {
-    return std::less<T>();
-  }
-}
-
-template <typename T1, typename T2, typename T3, typename Cmp>
-void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
-               const T3* input2_data, const RuntimeShape& output_shape,
-               T2* output_data, const Cmp& cmp) {
-  TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
-  TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
-                   output_shape.DimensionsCount());
-  int axis = input2_data[0];
-  if (axis < 0) {
-    axis += input1_shape.DimensionsCount();
-  }
-  const int axis_size = input1_shape.Dims(axis);
-
-  int outer_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
-    outer_size *= input1_shape.Dims(i);
-  }
-
-  int inner_size = 1;
-  const int dims_count = input1_shape.DimensionsCount();
-  for (int i = axis + 1; i < dims_count; ++i) {
-    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
-    inner_size *= input1_shape.Dims(i);
-  }
-  for (int outer = 0; outer < outer_size; ++outer) {
-    for (int inner = 0; inner < inner_size; ++inner) {
-      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
-      T2 min_max_index = 0;
-      for (int i = 1; i < axis_size; ++i) {
-        const auto& curr_value =
-            input1_data[(outer * axis_size + i) * inner_size + inner];
-        if (cmp(curr_value, min_max_value)) {
-          min_max_value = curr_value;
-          min_max_index = static_cast<T2>(i);
-        }
-      }
-      output_data[outer * inner_size + inner] = min_max_index;
-    }
-  }
-}
-
-template <typename T1, typename T2, typename T3>
-void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
-               const T3* input2_data, const RuntimeShape& output_shape,
-               T2* output_data, const bool is_arg_max) {
-  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
-            GetComparefunction<T1>(is_arg_max));
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_

+ 0 - 275
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h

@@ -1,275 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
-
-#include <algorithm>
-#include <cstdint>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-namespace batch_matmul {
-
-// Determine which dimension is the broadcast dimension.
-inline int broadcast_dim(int lhs_dim, int rhs_dim) {
-  if (lhs_dim == rhs_dim) return lhs_dim;
-  if (lhs_dim == 1) return rhs_dim;
-  TFLITE_DCHECK_EQ(rhs_dim, 1);
-  return lhs_dim;
-}
-
-// Compute the "extent" for iterating on this dimension.
-// If we are broadcasting, then don't advance (i.e return 0).
-inline int extent(const RuntimeShape& shape, int x) {
-  if (shape.Dims(x) == 1) {
-    return 0;
-  }
-  int prod = 1;
-  for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
-    prod *= shape.Dims(i);
-  }
-  return prod;
-}
-
-}  // namespace batch_matmul
-
-template <typename Ta, typename Tb, typename Tout>
-inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
-                        const RuntimeShape& rhs_shape, const Tb* rhs_data,
-                        const RuntimeShape& output_shape, Tout* output_data) {
-  const RuntimeShape extended_lhs_shape =
-      RuntimeShape::ExtendedShape(5, lhs_shape);
-  const RuntimeShape extended_rhs_shape =
-      RuntimeShape::ExtendedShape(5, rhs_shape);
-
-  const int batch_dim0 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
-  const int batch_dim1 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
-  const int batch_dim2 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
-
-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
-
-  // Set params for each matrix multiply.
-  const int lhs_rows = extended_lhs_shape.Dims(3);
-  const int rhs_cols = extended_rhs_shape.Dims(4);
-  const int accum_depth = extended_lhs_shape.Dims(4);
-
-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
-    const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
-    const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
-      const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
-      const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
-        const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
-        const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
-        Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
-                                       b1 * batch_dim2 + b2) *
-                                          lhs_rows * rhs_cols;
-        for (int j = 0; j < rhs_cols; ++j) {
-          for (int i = 0; i < lhs_rows; ++i) {
-            Tout total = 0;
-            for (int k = 0; k < accum_depth; ++k) {
-              total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
-                       static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
-            }
-            int idx = lhs_rows * j + i;
-            out_ptr[idx] = total;
-          }
-        }
-      }
-    }
-  }
-}
-
-inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
-                        const RuntimeShape& rhs_shape, const int8_t* rhs_data,
-                        const float* scaling_factors,
-                        const int32_t* input_offset, int32_t* row_sums,
-                        const RuntimeShape& output_shape, float* output_data,
-                        bool* compute_row_sums) {
-  const RuntimeShape extended_lhs_shape =
-      RuntimeShape::ExtendedShape(5, lhs_shape);
-  const RuntimeShape extended_rhs_shape =
-      RuntimeShape::ExtendedShape(5, rhs_shape);
-
-  const int batch_dim0 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
-  const int batch_dim1 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
-  const int batch_dim2 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
-
-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
-
-  // Set params for each matrix multiply.
-  const int lhs_rows = extended_lhs_shape.Dims(3);
-  const int rhs_cols = extended_rhs_shape.Dims(4);
-  const int accum_depth = extended_lhs_shape.Dims(4);
-
-  const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
-  const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
-  const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
-  const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
-  const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
-  const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
-
-  if (!compute_row_sums || *compute_row_sums) {
-    int num_weights_matrices = 1;
-    for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
-      num_weights_matrices *= extended_lhs_shape.Dims(i);
-    }
-    tensor_utils::ReductionSumVector(
-        lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
-    if (compute_row_sums) {
-      *compute_row_sums = false;
-    }
-  }
-
-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
-    const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
-    const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
-    const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
-    const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
-    const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
-      const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
-      const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
-      const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
-      const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
-      const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
-        const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
-        const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
-        const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
-        const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
-        const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
-        float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
-                                        b1 * batch_dim2 + b2) *
-                                           lhs_rows * rhs_cols;
-        for (int j = 0; j < rhs_cols; ++j) {
-          const float batch_scaling_factor = scale_ptr2[j];
-          const float batch_offset = static_cast<float>(ioff_ptr2[j]);
-          for (int i = 0; i < lhs_rows; ++i) {
-            int32_t total = 0;
-            for (int k = 0; k < accum_depth; ++k) {
-              total +=
-                  lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
-            }
-            int32_t row_sum = woff_ptr2[i];
-            total -= row_sum * batch_offset;
-            int idx = lhs_rows * j + i;
-            out_ptr[idx] += batch_scaling_factor * total;
-          }
-        }
-      }
-    }
-  }
-}
-
-template <typename T, typename AccumT>
-inline void BatchMatMul(const FullyConnectedParams& params,
-                        const RuntimeShape& lhs_shape, const T* lhs_data,
-                        const RuntimeShape& rhs_shape, const T* rhs_data,
-                        const RuntimeShape& output_shape, T* output_data) {
-  const RuntimeShape extended_lhs_shape =
-      RuntimeShape::ExtendedShape(5, lhs_shape);
-  const RuntimeShape extended_rhs_shape =
-      RuntimeShape::ExtendedShape(5, rhs_shape);
-
-  const int batch_dim0 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
-  const int batch_dim1 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
-  const int batch_dim2 = batch_matmul::broadcast_dim(
-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
-
-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
-
-  // Set params for each matrix multiply.
-  const int lhs_rows = extended_lhs_shape.Dims(3);
-  const int rhs_cols = extended_rhs_shape.Dims(4);
-  const int accum_depth = extended_lhs_shape.Dims(4);
-
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
-    const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
-    const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
-      const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
-      const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
-        const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
-        const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
-        T* out_ptr = output_data +
-                     ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
-                         lhs_rows * rhs_cols;
-
-        for (int j = 0; j < rhs_cols; ++j) {
-          for (int i = 0; i < lhs_rows; ++i) {
-            AccumT total = 0;
-            for (int k = 0; k < accum_depth; ++k) {
-              AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
-              AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
-              total += (lhs_val + filter_offset) * (rhs_val + input_offset);
-            }
-            int32_t total_scaled = MultiplyByQuantizedMultiplier(
-                total, output_multiplier, output_shift);
-            total_scaled += output_offset;
-            total_scaled = std::max(total_scaled, output_activation_min);
-            total_scaled = std::min(total_scaled, output_activation_max);
-            const int idx = lhs_rows * j + i;
-            out_ptr[idx] = static_cast<T>(total_scaled);
-          }
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_

+ 0 - 101
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h

@@ -1,101 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
-
-#include <cmath>
-
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-// TODO(b/135760455): Move this method anonymous namespace in a cc file.
-inline RuntimeShape ExtendShapeBatchToSpace(const RuntimeShape& shape) {
-  if (shape.DimensionsCount() == 4) {
-    return shape;
-  }
-  RuntimeShape new_shape(4, 1);
-  new_shape.SetDim(0, shape.Dims(0));
-  new_shape.SetDim(1, shape.Dims(1));
-  new_shape.SetDim(3, shape.Dims(2));
-  return new_shape;
-}
-
-template <typename T>
-inline void BatchToSpaceND(const RuntimeShape& unextended_input1_shape,
-                           const T* input1_data,
-                           const RuntimeShape& unextended_input2_shape,
-                           const int32_t* block_shape_data,
-                           const RuntimeShape& unextended_input3_shape,
-                           const int32_t* crops_data,
-                           const RuntimeShape& unextended_output_shape,
-                           T* output_data) {
-  ruy::profiler::ScopeLabel label("BatchToSpaceND");
-  TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3);
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(),
-                   unextended_output_shape.DimensionsCount());
-
-  const RuntimeShape input1_shape =
-      ExtendShapeBatchToSpace(unextended_input1_shape);
-  const RuntimeShape output_shape =
-      ExtendShapeBatchToSpace(unextended_output_shape);
-
-  const int output_width = output_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_batch_size = output_shape.Dims(0);
-
-  const int depth = input1_shape.Dims(3);
-  const int input_width = input1_shape.Dims(2);
-  const int input_height = input1_shape.Dims(1);
-  const int input_batch_size = input1_shape.Dims(0);
-
-  const int block_shape_height = block_shape_data[0];
-  const int block_shape_width =
-      unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1;
-  const int crops_top = crops_data[0];
-  const int crops_left =
-      unextended_input1_shape.DimensionsCount() == 4 ? crops_data[2] : 0;
-  for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) {
-    const int out_batch = in_batch % output_batch_size;
-    const int spatial_offset = in_batch / output_batch_size;
-    for (int in_h = 0; in_h < input_height; ++in_h) {
-      const int out_h = in_h * block_shape_height +
-                        spatial_offset / block_shape_width - crops_top;
-      if (out_h < 0 || out_h >= output_height) {
-        continue;
-      }
-      for (int in_w = 0; in_w < input_width; ++in_w) {
-        const int out_w = in_w * block_shape_width +
-                          spatial_offset % block_shape_width - crops_left;
-
-        if (out_w < 0 || out_w >= output_width) {
-          continue;
-        }
-        T* out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
-        const T* in =
-            input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0);
-        memcpy(out, in, depth * sizeof(T));
-      }
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_

+ 0 - 91
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h

@@ -1,91 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// Also appears to duplicate MinimumMaximum.
-//
-// R: Result type. T1: Input 1 type. T2: Input 2 type.
-template <typename R, typename T1, typename T2>
-inline void BroadcastBinaryFunction4DSlow(
-    const RuntimeShape& unextended_input1_shape, const T1* input1_data,
-    const RuntimeShape& unextended_input2_shape, const T2* input2_data,
-    const RuntimeShape& unextended_output_shape, R* output_data,
-    R (*func)(T1, T2)) {
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
-                                      unextended_input2_shape, &desc1, &desc2);
-
-  const int* dims_data =
-      reinterpret_cast<const int*>(output_shape.DimsDataUpTo5D());
-  for (int b = 0; b < output_shape.Dims(0); ++b) {
-    int out_idx_b = b * dims_data[1];
-    int in_idx1_b = desc1.strides[0] * b;
-    int in_idx2_b = desc2.strides[0] * b;
-    for (int y = 0; y < output_shape.Dims(1); ++y) {
-      int out_idx_y = (out_idx_b + y) * dims_data[2];
-      int in_idx1_y = in_idx1_b + desc1.strides[1] * y;
-      int in_idx2_y = in_idx2_b + desc2.strides[1] * y;
-      for (int x = 0; x < output_shape.Dims(2); ++x) {
-        int out_idx_x = (out_idx_y + x) * dims_data[3];
-        int in1_idx = in_idx1_y + desc1.strides[2] * x;
-        int in2_idx = in_idx2_y + desc2.strides[2] * x;
-        for (int c = 0; c < output_shape.Dims(3); ++c) {
-          auto out_idx = out_idx_x + c;
-          auto in1_val = input1_data[in1_idx];
-          auto in2_val = input2_data[in2_idx];
-          output_data[out_idx] = func(in1_val, in2_val);
-          in1_idx += desc1.strides[3];
-          in2_idx += desc2.strides[3];
-        }
-      }
-    }
-  }
-}
-
-// R: Result type. T1: Input 1 type. T2: Input 2 type.
-template <typename R, typename T1, typename T2>
-inline void BinaryFunction(const RuntimeShape& input1_shape,
-                           const T1* input1_data,
-                           const RuntimeShape& input2_shape,
-                           const T2* input2_data,
-                           const RuntimeShape& output_shape, R* output_data,
-                           R (*func)(T1, T2)) {
-  const int flat_size =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = func(input1_data[i], input2_data[i]);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_

+ 0 - 56
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h

@@ -1,56 +0,0 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
-
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename T>
-void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data,
-                   const RuntimeShape& input2_shape, const T* input2_data,
-                   const RuntimeShape& output_shape, T* output_data) {
-  // Gets data at the backward index i of the shape tensor. Returns 1 if the
-  // index is out of range.
-  auto get_shape_data = [](const RuntimeShape& shape, const T* data,
-                           int backward_idx) -> T {
-    int forward_idx = shape.FlatSize() - 1 - backward_idx;
-    if (forward_idx < 0) return 1;
-    return data[forward_idx];
-  };
-
-  int output_num_elements = output_shape.FlatSize();
-  for (int i = 0; i < output_num_elements; ++i) {
-    int backward_i = output_num_elements - 1 - i;
-    int shape1_i = get_shape_data(input1_shape, input1_data, i);
-    int shape2_i = get_shape_data(input2_shape, input2_data, i);
-    if (shape1_i == 1) {
-      output_data[backward_i] = shape2_i;
-    } else if (shape2_i == 1) {
-      output_data[backward_i] = shape1_i;
-    } else {
-      TFLITE_CHECK_EQ(shape1_i, shape2_i);
-      output_data[backward_i] = shape1_i;
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_

+ 0 - 97
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h

@@ -1,97 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/kernel_util.h"
-
-namespace tflite {
-namespace reference_ops {
-template <int N>
-void BroadcastImpl(const NdArrayDesc<N>& input_desc, const char* input_data,
-                   const NdArrayDesc<N>& output_desc, char* output_data,
-                   int indexes[N], int dim, const int last_broadcasting_dim,
-                   const int type_size) {
-  // Copy data from input to output.
-  if (dim == last_broadcasting_dim) {
-    int copy_size = output_desc.strides[dim] * type_size;
-    const char* data_src =
-        input_data + SubscriptToIndex(input_desc, indexes) * type_size;
-    char* data_dst =
-        output_data + SubscriptToIndex(output_desc, indexes) * type_size;
-    for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
-      memcpy(data_dst, data_src, copy_size);
-    }
-    return;
-  }
-
-  // Recursive call to find the next broadcasting.
-  for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim];
-       ++indexes[dim]) {
-    BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes,
-                     dim + 1, last_broadcasting_dim, type_size);
-  }
-
-  // Duplicate data in output tensor.
-  indexes[dim] = 0;
-  if (input_desc.extents[dim] != output_desc.extents[dim]) {
-    int copy_size = output_desc.strides[dim] * type_size;
-    char* data_src =
-        output_data + SubscriptToIndex(output_desc, indexes) * type_size;
-    char* data_dst = data_src + copy_size;
-    for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
-      memcpy(data_dst, data_src, copy_size);
-    }
-  }
-}
-
-template <int N>
-inline void BroadcastTo(const RuntimeShape& unextended_input_shape,
-                        const char* input_data,
-                        const RuntimeShape& unextended_output_shape,
-                        char* output_data, TfLiteType data_type) {
-  NdArrayDesc<N> input_desc;
-  NdArrayDesc<N> output_desc;
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
-                 &input_desc);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
-                 &output_desc);
-
-  // Get the last dimension has broadcasting. At this dimension, the data is
-  // copied from input tensor to output tensor.
-  int last_broadcast_dim = -1;
-  for (int i = N - 1; i >= 0; --i) {
-    if (input_desc.extents[i] != output_desc.extents[i]) {
-      last_broadcast_dim = i;
-      break;
-    }
-  }
-
-  // If non-broadcasting, just copy data from input to output tensor.
-  if (last_broadcast_dim == -1) {
-    memcpy(output_data, input_data,
-           unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type));
-    return;
-  }
-
-  // Broadcasting using memcpy.
-  int indexes[N] = {0};
-  BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes, 0,
-                   last_broadcast_dim, TfLiteTypeGetSize(data_type));
-}
-}  // namespace reference_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_

+ 0 - 37
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h

@@ -1,37 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
-
-#include <cmath>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
-                 const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = std::ceil(input_data[i]);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_

+ 0 - 280
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h

@@ -1,280 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-inline bool EqualFn(T lhs, T rhs) {
-  return lhs == rhs;
-}
-
-template <typename T>
-inline bool NotEqualFn(T lhs, T rhs) {
-  return lhs != rhs;
-}
-
-template <typename T>
-inline bool GreaterFn(T lhs, T rhs) {
-  return lhs > rhs;
-}
-template <typename T>
-inline bool GreaterEqualFn(T lhs, T rhs) {
-  return lhs >= rhs;
-}
-template <typename T>
-inline bool LessFn(T lhs, T rhs) {
-  return lhs < rhs;
-}
-template <typename T>
-inline bool LessEqualFn(T lhs, T rhs) {
-  return lhs <= rhs;
-}
-
-template <typename T>
-using ComparisonFn = bool (*)(T, T);
-
-template <typename T, ComparisonFn<T> F>
-inline void ComparisonImpl(
-    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
-  const int64_t flatsize =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int64_t i = 0; i < flatsize; ++i) {
-    output_data[i] = F(input1_data[i], input2_data[i]);
-  }
-}
-
-template <ComparisonFn<float> F>
-inline void Comparison(const ComparisonParams& op_params,
-                       const RuntimeShape& input1_shape,
-                       const float* input1_data,
-                       const RuntimeShape& input2_shape,
-                       const float* input2_data,
-                       const RuntimeShape& output_shape, bool* output_data) {
-  ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
-                           input2_data, output_shape, output_data);
-}
-
-template <typename T, ComparisonFn<int32_t> F>
-inline void ComparisonWithScaling(
-    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
-  int left_shift = op_params.left_shift;
-  int32_t input1_offset = op_params.input1_offset;
-  int32_t input1_multiplier = op_params.input1_multiplier;
-  int input1_shift = op_params.input1_shift;
-  int32_t input2_offset = op_params.input2_offset;
-  int32_t input2_multiplier = op_params.input2_multiplier;
-  int input2_shift = op_params.input2_shift;
-
-  const int64_t flatsize =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int64_t i = 0; i < flatsize; ++i) {
-    const int32_t input1_val = input1_offset + input1_data[i];
-    const int32_t input2_val = input2_offset + input2_data[i];
-    const int32_t shifted_input1_val = input1_val * (1 << left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, input1_multiplier, input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, input2_multiplier, input2_shift);
-    output_data[i] = F(scaled_input1_val, scaled_input2_val);
-  }
-}
-
-struct BroadcastComparison4DSlowCommon {
-  const RuntimeShape output_shape;
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-};
-
-inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
-    const RuntimeShape& unextended_input1_shape,
-    const RuntimeShape& unextended_input2_shape,
-    const RuntimeShape& unextended_output_shape) {
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
-                                      unextended_input2_shape, &desc1, &desc2);
-  return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
-          desc2};
-}
-
-template <typename T, ComparisonFn<T> F>
-inline void BroadcastComparison4DSlowImpl(
-    const ComparisonParams& op_params,
-    const RuntimeShape& unextended_input1_shape, const T* input1_data,
-    const RuntimeShape& unextended_input2_shape, const T* input2_data,
-    const RuntimeShape& unextended_output_shape, bool* output_data) {
-  const BroadcastComparison4DSlowCommon dims =
-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
-                                          unextended_input2_shape,
-                                          unextended_output_shape);
-
-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
-          output_data[Offset(dims.output_shape, b, y, x, c)] =
-              F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
-                input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
-        }
-      }
-    }
-  }
-}
-
-template <ComparisonFn<float> F>
-inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
-                                      const RuntimeShape& input1_shape,
-                                      const float* input1_data,
-                                      const RuntimeShape& input2_shape,
-                                      const float* input2_data,
-                                      const RuntimeShape& output_shape,
-                                      bool* output_data) {
-  BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
-                                          input2_shape, input2_data,
-                                          output_shape, output_data);
-}
-
-template <typename T, ComparisonFn<int32_t> F>
-inline void BroadcastComparison4DSlowWithScaling(
-    const ComparisonParams& op_params,
-    const RuntimeShape& unextended_input1_shape, const T* input1_data,
-    const RuntimeShape& unextended_input2_shape, const T* input2_data,
-    const RuntimeShape& unextended_output_shape, bool* output_data) {
-  const BroadcastComparison4DSlowCommon dims =
-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
-                                          unextended_input2_shape,
-                                          unextended_output_shape);
-
-  int left_shift = op_params.left_shift;
-  int32_t input1_offset = op_params.input1_offset;
-  int32_t input1_multiplier = op_params.input1_multiplier;
-  int input1_shift = op_params.input1_shift;
-  int32_t input2_offset = op_params.input2_offset;
-  int32_t input2_multiplier = op_params.input2_multiplier;
-  int input2_shift = op_params.input2_shift;
-
-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
-          const int32_t input1_val =
-              input1_offset +
-              input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
-          const int32_t input2_val =
-              input2_offset +
-              input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
-          const int32_t shifted_input1_val = input1_val * (1 << left_shift);
-          const int32_t shifted_input2_val = input2_val * (1 << left_shift);
-          const int32_t scaled_input1_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input1_val, input1_multiplier, input1_shift);
-          const int32_t scaled_input2_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input2_val, input2_multiplier, input2_shift);
-          output_data[Offset(dims.output_shape, b, y, x, c)] =
-              F(scaled_input1_val, scaled_input2_val);
-        }
-      }
-    }
-  }
-}
-
-#define TFLITE_COMPARISON_OP(name)                                             \
-  inline void name(const ComparisonParams& op_params,                          \
-                   const RuntimeShape& input1_shape, const float* input1_data, \
-                   const RuntimeShape& input2_shape, const float* input2_data, \
-                   const RuntimeShape& output_shape, bool* output_data) {      \
-    Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape,   \
-                         input2_data, output_shape, output_data);              \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void name##NoScaling(                                                 \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const T* input1_data, const RuntimeShape& input2_shape,                  \
-      const T* input2_data, const RuntimeShape& output_shape,                  \
-      bool* output_data) {                                                     \
-    ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data,          \
-                                input2_shape, input2_data, output_shape,       \
-                                output_data);                                  \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void name##WithScaling(                                               \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const T* input1_data, const RuntimeShape& input2_shape,                  \
-      const T* input2_data, const RuntimeShape& output_shape,                  \
-      bool* output_data) {                                                     \
-    ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data,   \
-                                       input2_shape, input2_data,              \
-                                       output_shape, output_data);             \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void Broadcast4DSlow##name##NoScaling(                                \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const T* input1_data, const RuntimeShape& input2_shape,                  \
-      const T* input2_data, const RuntimeShape& output_shape,                  \
-      bool* output_data) {                                                     \
-    BroadcastComparison4DSlowImpl<T, name##Fn>(                                \
-        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
-        output_shape, output_data);                                            \
-  }                                                                            \
-  inline void Broadcast4DSlow##name(                                           \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const float* input1_data, const RuntimeShape& input2_shape,              \
-      const float* input2_data, const RuntimeShape& output_shape,              \
-      bool* output_data) {                                                     \
-    BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data,  \
-                                        input2_shape, input2_data,             \
-                                        output_shape, output_data);            \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void Broadcast4DSlow##name##WithScaling(                              \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const T* input1_data, const RuntimeShape& input2_shape,                  \
-      const T* input2_data, const RuntimeShape& output_shape,                  \
-      bool* output_data) {                                                     \
-    BroadcastComparison4DSlowWithScaling<T, name##Fn>(                         \
-        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
-        output_shape, output_data);                                            \
-  }
-TFLITE_COMPARISON_OP(Equal);
-TFLITE_COMPARISON_OP(NotEqual);
-TFLITE_COMPARISON_OP(Greater);
-TFLITE_COMPARISON_OP(GreaterEqual);
-TFLITE_COMPARISON_OP(Less);
-TFLITE_COMPARISON_OP(LessEqual);
-#undef TFLITE_COMPARISON_OP
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_

+ 0 - 141
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h

@@ -1,141 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename Scalar>
-inline void Concatenation(const ConcatenationParams& params,
-                          const RuntimeShape* const* input_shapes,
-                          const Scalar* const* input_data,
-                          const RuntimeShape& output_shape,
-                          Scalar* output_data) {
-  int axis = params.axis;
-  int inputs_count = params.inputs_count;
-  const int concat_dimensions = output_shape.DimensionsCount();
-  TFLITE_DCHECK_LT(axis, concat_dimensions);
-
-  int64_t concat_size = 0;
-  for (int i = 0; i < inputs_count; i++) {
-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
-    for (int j = 0; j < concat_dimensions; j++) {
-      if (j != axis) {
-        MatchingDim(*input_shapes[i], j, output_shape, j);
-      }
-    }
-    concat_size += input_shapes[i]->Dims(axis);
-  }
-  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
-  int64_t outer_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    outer_size *= output_shape.Dims(i);
-  }
-  // For all input arrays,
-  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
-  int64_t base_inner_size = 1;
-  for (int i = axis + 1; i < concat_dimensions; ++i) {
-    base_inner_size *= output_shape.Dims(i);
-  }
-
-  Scalar* output_ptr = output_data;
-  for (int k = 0; k < outer_size; k++) {
-    for (int i = 0; i < inputs_count; ++i) {
-      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
-      const Scalar* input_ptr = input_data[i] + k * copy_size;
-      memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
-      output_ptr += copy_size;
-    }
-  }
-}
-
-// TODO(b/174275780): The quantized implementation of concatentation isn't fully
-// quantized as it takes scale as a floating point value. This should be fixed
-// when optimizng this routine further.
-inline void ConcatenationWithScaling(const ConcatenationParams& params,
-                                     const RuntimeShape* const* input_shapes,
-                                     const uint8_t* const* input_data,
-                                     const RuntimeShape& output_shape,
-                                     uint8_t* output_data) {
-  int axis = params.axis;
-  const int32_t* input_zeropoint = params.input_zeropoint;
-  const float* input_scale = params.input_scale;
-  int inputs_count = params.inputs_count;
-  const int32_t output_zeropoint = params.output_zeropoint;
-  const float output_scale = params.output_scale;
-
-  const int concat_dimensions = output_shape.DimensionsCount();
-  TFLITE_DCHECK_LT(axis, concat_dimensions);
-
-  int64_t concat_size = 0;
-  for (int i = 0; i < inputs_count; i++) {
-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
-    for (int j = 0; j < concat_dimensions; j++) {
-      if (j != axis) {
-        MatchingDim(*input_shapes[i], j, output_shape, j);
-      }
-    }
-    concat_size += input_shapes[i]->Dims(axis);
-  }
-  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
-  int64_t outer_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    outer_size *= output_shape.Dims(i);
-  }
-  // For all input arrays,
-  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
-  int64_t base_inner_size = 1;
-  for (int i = axis + 1; i < concat_dimensions; ++i) {
-    base_inner_size *= output_shape.Dims(i);
-  }
-
-  const float inverse_output_scale = 1.f / output_scale;
-  uint8_t* output_ptr = output_data;
-  for (int k = 0; k < outer_size; k++) {
-    for (int i = 0; i < inputs_count; ++i) {
-      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
-      const uint8_t* input_ptr = input_data[i] + k * copy_size;
-      if (input_zeropoint[i] == output_zeropoint &&
-          input_scale[i] == output_scale) {
-        memcpy(output_ptr, input_ptr, copy_size);
-      } else {
-        const float scale = input_scale[i] * inverse_output_scale;
-        const float bias = -input_zeropoint[i] * scale;
-        for (int j = 0; j < copy_size; ++j) {
-          const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
-                                    input_ptr[j] * scale + bias)) +
-                                output_zeropoint;
-          output_ptr[j] = static_cast<uint8_t>(
-              std::max<int32_t>(std::min<int32_t>(255, value), 0));
-        }
-      }
-      output_ptr += copy_size;
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_

+ 0 - 287
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h

@@ -1,287 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
-                 const float* input_data, const RuntimeShape& filter_shape,
-                 const float* filter_data, const RuntimeShape& bias_shape,
-                 const float* bias_data, const RuntimeShape& output_shape,
-                 float* output_data, const RuntimeShape& im2col_shape,
-                 float* im2col_data) {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  (void)im2col_data;   // only used in optimized code.
-  (void)im2col_shape;  // only used in optimized code.
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = input_shape.Dims(3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int filter_input_depth = filter_shape.Dims(3);
-  const int groups = input_depth / filter_input_depth;
-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
-  const int filters_per_group = output_depth / groups;
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      const int in_y_origin = (out_y * stride_height) - pad_height;
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        const int in_x_origin = (out_x * stride_width) - pad_width;
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          auto group = out_channel / filters_per_group;
-          float total = 0.f;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
-
-              // Zero padding by omitting the areas outside the image.
-              const bool is_point_inside_image =
-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                  (in_y < input_height);
-
-              if (!is_point_inside_image) {
-                continue;
-              }
-              for (int in_channel = 0; in_channel < filter_input_depth;
-                   ++in_channel) {
-                float input_value =
-                    input_data[Offset(input_shape, batch, in_y, in_x,
-                                      in_channel + group * filter_input_depth)];
-                float filter_value = filter_data[Offset(
-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
-                total += (input_value * filter_value);
-              }
-            }
-          }
-          float bias_value = 0.0f;
-          if (bias_data) {
-            bias_value = bias_data[out_channel];
-          }
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              ActivationFunctionWithMinMax(total + bias_value,
-                                           output_activation_min,
-                                           output_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
-                 const uint8_t* input_data, const RuntimeShape& filter_shape,
-                 const uint8_t* filter_data, const RuntimeShape& bias_shape,
-                 const int32_t* bias_data, const RuntimeShape& output_shape,
-                 uint8_t* output_data, const RuntimeShape& im2col_shape,
-                 uint8_t* im2col_data, void* cpu_backend_context) {
-  (void)cpu_backend_context;  // only used in optimized code.
-  (void)im2col_data;          // only used in optimized code.
-  (void)im2col_shape;         // only used in optimized code.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = input_shape.Dims(3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int filter_input_depth = filter_shape.Dims(3);
-  const int groups = input_depth / filter_input_depth;
-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
-  const int filters_per_group = output_depth / groups;
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      const int in_y_origin = (out_y * stride_height) - pad_height;
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        const int in_x_origin = (out_x * stride_width) - pad_width;
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          auto group = out_channel / filters_per_group;
-          int32_t acc = 0;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
-
-              // Zero padding by omitting the areas outside the image.
-              const bool is_point_inside_image =
-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                  (in_y < input_height);
-
-              if (!is_point_inside_image) {
-                continue;
-              }
-
-              for (int in_channel = 0; in_channel < filter_input_depth;
-                   ++in_channel) {
-                int32_t input_val =
-                    input_data[Offset(input_shape, batch, in_y, in_x,
-                                      in_channel + group * filter_input_depth)];
-                int32_t filter_val = filter_data[Offset(
-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
-                acc +=
-                    (filter_val + filter_offset) * (input_val + input_offset);
-              }
-            }
-          }
-          if (bias_data) {
-            acc += bias_data[out_channel];
-          }
-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              output_shift);
-          acc += output_offset;
-          acc = std::max(acc, output_activation_min);
-          acc = std::min(acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<uint8_t>(acc);
-        }
-      }
-    }
-  }
-}
-
-inline void HybridConvPerChannel(
-    const ConvParams& params, float* scaling_factors_ptr,
-    const RuntimeShape& input_shape, const int8_t* input_data,
-    const RuntimeShape& filter_shape, const int8_t* filter_data,
-    const RuntimeShape& bias_shape, const float* bias_data,
-    const RuntimeShape& output_shape, float* output_data,
-    const RuntimeShape& im2col_shape, int8_t* im2col_data,
-    const float* per_channel_scale, int32_t* input_offset) {
-  (void)im2col_data;   // only used in optimized code.
-  (void)im2col_shape;  // only used in optimized code.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = input_shape.Dims(3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int filter_input_depth = filter_shape.Dims(3);
-  const int groups = input_depth / filter_input_depth;
-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
-  const int filters_per_group = output_depth / groups;
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          auto group = out_channel / filters_per_group;
-          const int in_x_origin = (out_x * stride_width) - pad_width;
-          const int in_y_origin = (out_y * stride_height) - pad_height;
-          int32_t acc = 0;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int in_channel = 0; in_channel < filter_input_depth;
-                   ++in_channel) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height)) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x,
-                      in_channel + group * filter_input_depth)];
-                  int32_t filter_val =
-                      filter_data[Offset(filter_shape, out_channel, filter_y,
-                                         filter_x, in_channel)];
-                  acc += filter_val * (input_val - input_offset[batch]);
-                }
-              }
-            }
-          }
-          float acc_float =
-              acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
-          if (bias_data) {
-            acc_float += bias_data[out_channel];
-          }
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              ActivationFunctionWithMinMax(acc_float, output_activation_min,
-                                           output_activation_max);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_

+ 0 - 175
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h

@@ -1,175 +0,0 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
-
-#include <algorithm>
-#include <cstdint>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename T>
-inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
-                   bool exclusive, bool reverse, T* output_data) {
-  const int32_t rank = shape.DimensionsCount();
-  TFLITE_DCHECK_GE(rank, 1);
-  TFLITE_DCHECK_GE(axis, 0);
-  TFLITE_DCHECK_LT(axis, rank);
-
-  size_t inner = 1;
-  size_t outer = 1;
-  size_t depth = 1;
-  for (int32_t i = 0; i < rank; i++) {
-    if (i < axis)
-      inner *= shape.Dims(i);
-    else if (i > axis)
-      outer *= shape.Dims(i);
-    else
-      depth = shape.Dims(i);
-  }
-
-  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
-    size_t outer_index_adj;
-    if (reverse)
-      outer_index_adj = (outer - 1) - outer_index;
-    else
-      outer_index_adj = outer_index;
-    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
-      T accumulator = 0;
-      size_t inner_index_adj;
-      if (reverse)
-        inner_index_adj = (inner - 1) - inner_index;
-      else
-        inner_index_adj = inner_index;
-      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
-        size_t depth_index_adj;
-        if (reverse)
-          depth_index_adj = (depth - 1) - depth_index;
-        else
-          depth_index_adj = depth_index;
-
-        size_t index = outer_index_adj;
-        index += inner_index_adj * depth * outer;
-        index += depth_index_adj * outer;
-
-        if (exclusive) {
-          output_data[index] = accumulator;
-          accumulator += input_data[index];
-        } else {
-          accumulator += input_data[index];
-          output_data[index] = accumulator;
-        }
-      }
-    }
-  }
-}
-
-//
-// Quantized INT8 CUMSUM
-//
-inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
-                   const RuntimeShape& shape, int32_t axis, bool exclusive,
-                   bool reverse, int8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  // Input offset is negative input zero point. Activation tensors are
-  // asymmetric quantized so they span the full int8 range.
-  // All inputs should have same zero-point and scale, this is checked during
-  // Prepare stage.
-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
-
-  const int32_t rank = shape.DimensionsCount();
-  TFLITE_DCHECK_GE(rank, 1);
-  TFLITE_DCHECK_GE(axis, 0);
-  TFLITE_DCHECK_LT(axis, rank);
-
-  size_t inner = 1;
-  size_t outer = 1;
-  size_t depth = 1;
-  for (int32_t i = 0; i < rank; i++) {
-    if (i < axis)
-      inner *= shape.Dims(i);
-    else if (i > axis)
-      outer *= shape.Dims(i);
-    else
-      depth = shape.Dims(i);
-  }
-
-  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
-    size_t outer_index_adj;
-    if (reverse)
-      outer_index_adj = (outer - 1) - outer_index;
-    else
-      outer_index_adj = outer_index;
-    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
-      int32_t accumulator = params.input1_offset;  // accumulator = 0
-      accumulator *= (1 << params.left_shift);
-      accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          accumulator, params.input1_multiplier, params.input1_shift);
-
-      size_t inner_index_adj;
-      if (reverse)
-        inner_index_adj = (inner - 1) - inner_index;
-      else
-        inner_index_adj = inner_index;
-
-      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
-        size_t depth_index_adj;
-        if (reverse)
-          depth_index_adj = (depth - 1) - depth_index;
-        else
-          depth_index_adj = depth_index;
-
-        size_t index = outer_index_adj;
-        index += inner_index_adj * depth * outer;
-        index += depth_index_adj * outer;
-
-        const int32_t y = params.input1_offset + input_data[index];
-        const int32_t shifted_y = y * (1 << params.left_shift);
-        const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_y, params.input1_multiplier, params.input1_shift);
-
-        int32_t scaled_output;
-        if (exclusive) {
-          scaled_output = accumulator;
-          accumulator += scaled_y;
-        } else {
-          accumulator += scaled_y;
-          scaled_output = accumulator;
-        }
-
-        const int32_t raw_output =
-            MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                scaled_output, params.output_multiplier, params.output_shift) +
-            params.output_offset;
-        const int32_t clamped_output =
-            std::min(params.quantized_activation_max,
-                     std::max(params.quantized_activation_min, raw_output));
-        output_data[index] = static_cast<int8_t>(clamped_output);
-      }
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_

+ 0 - 79
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h

@@ -1,79 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename T>
-inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
-                         const RuntimeShape& unextended_input_shape,
-                         const T* input_data,
-                         const RuntimeShape& unextended_output_shape,
-                         T* output_data) {
-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-
-  const int input_depth = input_shape.Dims(3);
-  const int input_width = input_shape.Dims(2);
-  const int input_height = input_shape.Dims(1);
-  const int input_batch = input_shape.Dims(0);
-
-  const int output_depth = output_shape.Dims(3);
-  const int output_width = output_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_batch = output_shape.Dims(0);
-
-  const int32_t block_size = op_params.block_size;
-
-  TFLITE_DCHECK_EQ(input_width * block_size, output_width);
-  TFLITE_DCHECK_EQ(input_height * block_size, output_height);
-  TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
-  TFLITE_DCHECK_EQ(input_batch, output_batch);
-
-  for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_h = 0; out_h < output_height; ++out_h) {
-      for (int out_w = 0; out_w < output_width; ++out_w) {
-        for (int out_d = 0; out_d < output_depth; ++out_d) {
-          const int in_d =
-              out_d + ((out_h % block_size) * block_size + out_w % block_size) *
-                          output_depth;
-
-          const int in_w = out_w / block_size;
-          const int in_h = out_h / block_size;
-          const int in_b = out_b;
-
-          const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
-          const int output_index =
-              Offset(output_shape, out_b, out_h, out_w, out_d);
-
-          output_data[output_index] = input_data[input_index];
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_

+ 0 - 100
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h

@@ -1,100 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void DepthwiseConv(
-    const DepthwiseParams& params, const RuntimeShape& input_shape,
-    const float* input_data, const RuntimeShape& filter_shape,
-    const float* filter_data, const RuntimeShape& bias_shape,
-    const float* bias_data, const RuntimeShape& output_shape,
-    float* output_data) {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-  for (int b = 0; b < batches; ++b) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int ic = 0; ic < input_depth; ++ic) {
-          for (int m = 0; m < depth_multiplier; m++) {
-            const int oc = m + ic * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            float total = 0.f;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height)) {
-                  float input_value =
-                      input_data[Offset(input_shape, b, in_y, in_x, ic)];
-                  float filter_value = filter_data[Offset(
-                      filter_shape, 0, filter_y, filter_x, oc)];
-                  total += (input_value * filter_value);
-                }
-              }
-            }
-            float bias_value = 0.0f;
-            if (bias_data) {
-              bias_value = bias_data[oc];
-            }
-            output_data[Offset(output_shape, b, out_y, out_x, oc)] =
-                ActivationFunctionWithMinMax(total + bias_value,
-                                             output_activation_min,
-                                             output_activation_max);
-          }
-        }
-      }
-    }
-  }
-}
-
-}  // end namespace reference_ops
-}  // end namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_

+ 0 - 319
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h

@@ -1,319 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
-
-#include <algorithm>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-// Used in tests and template parameters to control which version of depthwise
-// convolution is called. Primarily for reference code, and specializations
-// forced in tests.
-enum class DepthwiseConvImplementation {
-  // Run all tests against kUseStandardEntry even if also testing another
-  // kernel, since we need to be sure that the main DepthwiseConv() function in
-  // optimized_ops.h dispatches to a correctly-executing kernel.
-  kNone = 0,                 // The "default" option: use the normal
-                             // DepthwiseConv kernel (entry) function.
-  kUseGenericKernel,         // Forced use of generic kernel.
-  kUseNeon3x3,               // 3x3 kernel that uses NEON when available.
-  kUseNeon3x3DotProduct,     // 3x3 kernel that uses dot-product enabled NEON
-                             // when available.
-  kUseCModel3x3DotProduct,   // 3x3 kernel, reference C model that is intended
-                             // to match overall design NEON code.
-  kUseUnwound3x3DotProduct,  // 3x3 kernel, reference C model with unwound loops
-                             // and some arrays.
-  kUseIntrinsics3x3DotProduct,  // 3x3 kernel using NEON intrinsics.
-};
-
-// Category of depthwise convolution output rounding.
-enum class DepthwiseConvOutputRounding {
-  kNone = 0,      // Invalid: specific method must be specified.
-  kAwayFromZero,  // Original method: exact halves rounded away from zero.
-  kUpward,        // Halves towards +infinity: adds 0.5 before truncate.
-  // This is where a future kNearestEven would be placed.
-};
-
-// Category of depthwise convolution depth multiplication.
-enum class DepthwiseConvDepthMultiplication {
-  kNoMultiplication = 0,  // Depth multiplier = 1.
-  kUnitInputDepth,        // Input depth = 1, output depth = depth multiplier.
-};
-
-namespace reference_ops {
-namespace depthwise_conv {
-
-template <DepthwiseConvOutputRounding output_rounding>
-inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
-                                  int shift) {
-  TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
-}
-
-// Single-rounding MultiplyByQuantizedMultiplier
-#if TFLITE_SINGLE_ROUNDING
-template <>
-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
-    int32_t x, int32_t quantized_multiplier, int shift) {
-  using gemmlowp::RoundingDivideByPOT;
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  int left_shift = shift > 0 ? shift : 0;
-  int right_shift = shift > 0 ? 0 : -shift;
-  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
-                                 x * (1 << left_shift), quantized_multiplier),
-                             right_shift);
-}
-
-template <>
-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
-    int32_t x, int32_t quantized_multiplier, int shift) {
-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
-}
-// Double-rounding MultiplyByQuantizedMultiplier
-#else
-template <>
-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
-    int32_t x, int32_t quantized_multiplier, int shift) {
-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
-}
-
-template <>
-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
-    int32_t x, int32_t quantized_multiplier, int shift) {
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  const int left_shift = shift > 0 ? shift : 0;
-  const int right_shift = shift > 0 ? 0 : -shift;
-  const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
-  return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
-                                            quantized_multiplier) +
-          rounding_offset) >>
-         right_shift;
-}
-#endif  // TFLITE_SINGLE_ROUNDING
-
-template <DepthwiseConvOutputRounding output_rounding>
-struct DepthwiseConvBasicKernel {
-  static inline void Run(
-      const DepthwiseParams& params, const RuntimeShape& input_shape,
-      const uint8_t* input_data, const RuntimeShape& filter_shape,
-      const uint8_t* filter_data, const RuntimeShape& bias_shape,
-      const int32_t* bias_data, const RuntimeShape& output_shape,
-      uint8_t* output_data) {
-    const int stride_width = params.stride_width;
-    const int stride_height = params.stride_height;
-    const int dilation_width_factor = params.dilation_width_factor;
-    const int dilation_height_factor = params.dilation_height_factor;
-    const int pad_width = params.padding_values.width;
-    const int pad_height = params.padding_values.height;
-    const int depth_multiplier = params.depth_multiplier;
-    const int32_t output_activation_min = params.quantized_activation_min;
-    const int32_t output_activation_max = params.quantized_activation_max;
-    const int32_t input_offset = params.input_offset;
-    const int32_t filter_offset = params.weights_offset;
-    const int32_t output_offset = params.output_offset;
-    const int32_t output_multiplier = params.output_multiplier;
-    const int output_shift = params.output_shift;
-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-    const int input_height = input_shape.Dims(1);
-    const int input_width = input_shape.Dims(2);
-    const int input_depth = input_shape.Dims(3);
-    const int filter_height = filter_shape.Dims(1);
-    const int filter_width = filter_shape.Dims(2);
-    const int output_height = output_shape.Dims(1);
-    const int output_width = output_shape.Dims(2);
-    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-    for (int b = 0; b < batches; ++b) {
-      for (int out_y = 0; out_y < output_height; ++out_y) {
-        for (int out_x = 0; out_x < output_width; ++out_x) {
-          for (int ic = 0; ic < input_depth; ++ic) {
-            for (int m = 0; m < depth_multiplier; m++) {
-              const int oc = m + ic * depth_multiplier;
-              const int in_x_origin = (out_x * stride_width) - pad_width;
-              const int in_y_origin = (out_y * stride_height) - pad_height;
-              int32_t acc = 0;
-              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                  const int in_x =
-                      in_x_origin + dilation_width_factor * filter_x;
-                  const int in_y =
-                      in_y_origin + dilation_height_factor * filter_y;
-                  // If the location is outside the bounds of the input image,
-                  // use zero as a default value.
-                  if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                      (in_y < input_height)) {
-                    int32_t input_val =
-                        input_data[Offset(input_shape, b, in_y, in_x, ic)];
-                    int32_t filter_val = filter_data[Offset(
-                        filter_shape, 0, filter_y, filter_x, oc)];
-                    acc += (filter_val + filter_offset) *
-                           (input_val + input_offset);
-                  }
-                }
-              }
-              if (bias_data) {
-                acc += bias_data[oc];
-              }
-              acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
-                                                        output_shift);
-              acc += output_offset;
-              acc = std::max(acc, output_activation_min);
-              acc = std::min(acc, output_activation_max);
-              output_data[Offset(output_shape, b, out_y, out_x, oc)] =
-                  static_cast<uint8_t>(acc);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  // TODO(b/148596273): Reconcile reference versions, perhaps with common
-  // MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
-  static inline void RunPerChannel(
-      const DepthwiseParams& params, const RuntimeShape& input_shape,
-      const int8_t* input_data, const RuntimeShape& filter_shape,
-      const int8_t* filter_data, const RuntimeShape& bias_shape,
-      const int32_t* bias_data, const RuntimeShape& output_shape,
-      int8_t* output_data) {
-    // Get parameters.
-    // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
-    const int stride_width = params.stride_width;
-    const int stride_height = params.stride_height;
-    const int dilation_width_factor = params.dilation_width_factor;
-    const int dilation_height_factor = params.dilation_height_factor;
-    const int pad_width = params.padding_values.width;
-    const int pad_height = params.padding_values.height;
-    const int depth_multiplier = params.depth_multiplier;
-    const int32_t input_offset = params.input_offset;
-    const int32_t output_offset = params.output_offset;
-    const int32_t output_activation_min = params.quantized_activation_min;
-    const int32_t output_activation_max = params.quantized_activation_max;
-    const int32_t* output_multiplier = params.output_multiplier_per_channel;
-    const int32_t* output_shift = params.output_shift_per_channel;
-
-    // Check dimensions of the tensors.
-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-    const int input_height = input_shape.Dims(1);
-    const int input_width = input_shape.Dims(2);
-    const int input_depth = input_shape.Dims(3);
-    const int filter_height = filter_shape.Dims(1);
-    const int filter_width = filter_shape.Dims(2);
-    const int output_height = output_shape.Dims(1);
-    const int output_width = output_shape.Dims(2);
-    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-    for (int batch = 0; batch < batches; ++batch) {
-      for (int out_y = 0; out_y < output_height; ++out_y) {
-        for (int out_x = 0; out_x < output_width; ++out_x) {
-          for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-            for (int m = 0; m < depth_multiplier; ++m) {
-              const int output_channel = m + in_channel * depth_multiplier;
-              const int in_x_origin = (out_x * stride_width) - pad_width;
-              const int in_y_origin = (out_y * stride_height) - pad_height;
-              int32_t acc = 0;
-              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                  const int in_x =
-                      in_x_origin + dilation_width_factor * filter_x;
-                  const int in_y =
-                      in_y_origin + dilation_height_factor * filter_y;
-                  // Zero padding by omitting the areas outside the image.
-                  const bool is_point_inside_image =
-                      (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                      (in_y < input_height);
-                  if (is_point_inside_image) {
-                    int32_t input_val = input_data[Offset(
-                        input_shape, batch, in_y, in_x, in_channel)];
-                    int32_t filter_val = filter_data[Offset(
-                        filter_shape, 0, filter_y, filter_x, output_channel)];
-                    // Accumulate with 32 bits accumulator.
-                    // In the nudging process during model quantization, we
-                    // force real value of 0.0 be represented by a quantized
-                    // value. This guarantees that the input_offset is a int8_t,
-                    // even though it is represented using int32_t. int32_t +=
-                    // int8_t
-                    // * (int8_t - int8_t) so the highest value we can get from
-                    // each accumulation is [-127, 127] * ([-128, 127] -
-                    // [-128, 127]), which is [-32512, 32512]. log2(32512)
-                    // = 14.98, which means we can accumulate at least 2^16
-                    // multiplications without overflow. The accumulator is
-                    // applied to a filter so the accumulation logic will hold
-                    // as long as the filter size (filter_y * filter_x *
-                    // in_channel) does not exceed 2^16, which is the case in
-                    // all the models we have seen so far.
-                    acc += filter_val * (input_val + input_offset);
-                  }
-                }
-              }
-              if (bias_data) {
-                acc += bias_data[output_channel];
-              }
-              acc = DepthwiseConvRound<output_rounding>(
-                  acc, output_multiplier[output_channel],
-                  output_shift[output_channel]);
-              acc += output_offset;
-              acc = std::max(acc, output_activation_min);
-              acc = std::min(acc, output_activation_max);
-              output_data[Offset(output_shape, batch, out_y, out_x,
-                                 output_channel)] = static_cast<int8_t>(acc);
-            }
-          }
-        }
-      }
-    }
-  }
-};
-
-}  // namespace depthwise_conv
-
-inline void DepthwiseConv(
-    const DepthwiseParams& params, const RuntimeShape& input_shape,
-    const uint8_t* input_data, const RuntimeShape& filter_shape,
-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    uint8_t* output_data) {
-  return depthwise_conv::DepthwiseConvBasicKernel<
-      DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
-                                                       input_data, filter_shape,
-                                                       filter_data, bias_shape,
-                                                       bias_data, output_shape,
-                                                       output_data);
-}
-
-}  // namespace reference_ops
-}  // end namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_

+ 0 - 78
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h

@@ -1,78 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
-
-#include <limits.h>
-
-#include <vector>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// Dequantizes into a float without rounding.
-template <typename InputT, typename OutputT>
-inline void Dequantize(const tflite::DequantizationParams& op_params,
-                       const RuntimeShape& input_shape,
-                       const InputT* input_data,
-                       const RuntimeShape& output_shape, OutputT* output_data) {
-  int32_t zero_point = op_params.zero_point;
-  const double scale = op_params.scale;
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    const int32_t val = input_data[i];
-    const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
-    output_data[i] = result;
-  }
-}
-
-// Dequantizes per-channel quantized tensor to float.
-template <typename T>
-inline void PerChannelDequantize(
-    const tflite::PerChannelDequantizationParams& op_params,
-    const RuntimeShape& input_shape, const T* input_data,
-    const RuntimeShape& output_shape, float* output_data) {
-  // Ensure flat size is same.
-  MatchingFlatSize(input_shape, output_shape);
-
-  const int32_t* zero_point = op_params.zero_point;
-  const float* scale = op_params.scale;
-  const int32_t quantized_dimension = op_params.quantized_dimension;
-  const int32_t num_dims = input_shape.DimensionsCount();
-  const int32_t* dims_data = input_shape.DimsData();
-  std::vector<int> current_dim(num_dims, 0);
-
-  do {
-    size_t offset =
-        ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
-                            current_dim.data(), 0, nullptr);
-    const int channel = current_dim[quantized_dimension];
-    const int32_t val = input_data[offset];
-    const float result =
-        static_cast<float>(scale[channel] * (val - zero_point[channel]));
-    output_data[offset] = result;
-  } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
-                     current_dim.data()));
-}
-
-}  // namespace reference_ops
-
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_

+ 0 - 247
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h

@@ -1,247 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  // Input offset is negative input zero point. Activation tensors are
-  // asymmetric quantized so they span the full int8 range.
-  constexpr int32_t max_value =
-      static_cast<int32_t>(std::numeric_limits<T>::max());
-  TFLITE_DCHECK_GE(params.input1_offset, -max_value);
-  TFLITE_DCHECK_LE(params.input1_offset, max_value);
-  TFLITE_DCHECK_GE(params.input2_offset, -max_value);
-  TFLITE_DCHECK_LE(params.input2_offset, max_value);
-  TFLITE_DCHECK_GE(params.output_offset, -max_value);
-  TFLITE_DCHECK_LE(params.output_offset, max_value);
-}
-
-// Element-wise div that can often be used for inner loop of broadcast Div as
-// well as the non-broadcast Div.
-template <typename T>
-inline void DivElementwise(int size, const ArithmeticParams& params,
-                           const T* input1_data, const T* input2_data,
-                           T* output_data) {
-  DivCheckArithmeticParams<T>(params);
-
-  for (int i = 0; i < size; ++i) {
-    int32_t input1_val = params.input1_offset + input1_data[i];
-    int32_t input2_val = params.input2_offset + input2_data[i];
-    TFLITE_DCHECK_NE(input2_val, 0);
-    if (input2_val < 0) {
-      // Invert signs to avoid a negative input2_val as input2_inv needs to be
-      // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
-      input1_val = -input1_val;
-      input2_val = -input2_val;
-    }
-    int recip_shift;
-    const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
-    const int headroom = CountLeadingSignBits(input1_val);
-    const int32_t unscaled_quotient =
-        MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
-                                                    headroom);
-    const int total_shift = params.output_shift - recip_shift - headroom;
-    const int32_t unclamped_result =
-        params.output_offset +
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            unscaled_quotient, params.output_multiplier, total_shift);
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, unclamped_result));
-    output_data[i] = static_cast<T>(clamped_output);
-  }
-}
-
-inline void Div(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
-                const RuntimeShape& output_shape, uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  DivElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void Div(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const int8_t* input1_data,
-                const RuntimeShape& input2_shape, const int8_t* input2_data,
-                const RuntimeShape& output_shape, int8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  DivElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-template <typename T, int N = 5>
-inline void BroadcastDivSlowQuantized(
-    const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
-    const T* input1_data, const RuntimeShape& unextended_input2_shape,
-    const T* input2_data, const RuntimeShape& unextended_output_shape,
-    T* output_data) {
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
-
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
-                                      unextended_input2_shape, &desc1, &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
-                 &output_desc);
-
-  DivCheckArithmeticParams<T>(params);
-
-  auto div_func = [&](int indexes[N]) {
-    int32_t input1_val =
-        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
-    int32_t input2_val =
-        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
-    TFLITE_DCHECK_NE(input2_val, 0);
-    if (input2_val < 0) {
-      // Invert signs to avoid a negative input2_val as input2_inv needs to be
-      // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
-      input1_val = -input1_val;
-      input2_val = -input2_val;
-    }
-    int recip_shift;
-    const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
-    const int headroom = CountLeadingSignBits(input1_val);
-    const int32_t unscaled_quotient =
-        MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
-                                                    headroom);
-    const int total_shift = params.output_shift - recip_shift - headroom;
-    const int32_t unclamped_result =
-        params.output_offset +
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            unscaled_quotient, params.output_multiplier, total_shift);
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, unclamped_result));
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        static_cast<T>(clamped_output);
-  };
-  NDOpsHelper<N>(output_desc, div_func);
-}
-
-template <int N = 5>
-inline void BroadcastDivSlow(const ArithmeticParams& params,
-                             const RuntimeShape& unextended_input1_shape,
-                             const uint8_t* input1_data,
-                             const RuntimeShape& unextended_input2_shape,
-                             const uint8_t* input2_data,
-                             const RuntimeShape& unextended_output_shape,
-                             uint8_t* output_data) {
-  BroadcastDivSlowQuantized<uint8_t, N>(
-      params, unextended_input1_shape, input1_data, unextended_input2_shape,
-      input2_data, unextended_output_shape, output_data);
-}
-
-template <int N = 5>
-inline void BroadcastDivSlow(const ArithmeticParams& params,
-                             const RuntimeShape& unextended_input1_shape,
-                             const int8_t* input1_data,
-                             const RuntimeShape& unextended_input2_shape,
-                             const int8_t* input2_data,
-                             const RuntimeShape& unextended_output_shape,
-                             int8_t* output_data) {
-  BroadcastDivSlowQuantized<int8_t, N>(
-      params, unextended_input1_shape, input1_data, unextended_input2_shape,
-      input2_data, unextended_output_shape, output_data);
-}
-
-// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
-// dimensionality if the runtime code does a single loop over one dimension
-// that handles broadcasting as the base case. The code generator would then
-// generate max(D1, D2) nested for loops.
-template <typename T, int N = 5>
-void BroadcastDivSlow(const ArithmeticParams& params,
-                      const RuntimeShape& unextended_input1_shape,
-                      const T* input1_data,
-                      const RuntimeShape& unextended_input2_shape,
-                      const T* input2_data,
-                      const RuntimeShape& unextended_output_shape,
-                      T* output_data) {
-  T output_activation_min;
-  T output_activation_max;
-  GetActivationParams(params, &output_activation_min, &output_activation_max);
-
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
-
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
-                                      unextended_input2_shape, &desc1, &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
-                 &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest
-  // stride, typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-
-  auto div_func = [&](int indexes[N]) {
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        ActivationFunctionWithMinMax(
-            input1_data[SubscriptToIndex(desc1, indexes)] /
-                input2_data[SubscriptToIndex(desc2, indexes)],
-            output_activation_min, output_activation_max);
-  };
-  NDOpsHelper<N>(output_desc, div_func);
-}
-
-template <typename T>
-inline void Div(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const T* input1_data,
-                const RuntimeShape& input2_shape, const T* input2_data,
-                const RuntimeShape& output_shape, T* output_data) {
-  T output_activation_min;
-  T output_activation_max;
-  GetActivationParams(params, &output_activation_min, &output_activation_max);
-
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
-        input1_data[i] / input2_data[i], output_activation_min,
-        output_activation_max);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_

+ 0 - 37
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h

@@ -1,37 +0,0 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
-
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void Elu(const RuntimeShape& input_shape, const float* input_data,
-                const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const float val = input_data[i];
-    output_data[i] = val < 0.0f ? TfLiteExpm1(val) : val;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_

+ 0 - 38
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h

@@ -1,38 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
-
-#include <cmath>
-
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename T>
-inline void Exp(const T* input_data, const size_t num_elements,
-                T* output_data) {
-  ruy::profiler::ScopeLabel label("Exp");
-  for (size_t idx = 0; idx < num_elements; ++idx) {
-    output_data[idx] = std::exp(input_data[idx]);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_

+ 0 - 38
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h

@@ -1,38 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
-
-#include <cmath>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename T>
-void Fill(const RuntimeShape& value_shape, const T* value_data,
-          const RuntimeShape& output_shape, T* output_data) {
-  TFLITE_DCHECK_EQ(value_shape.DimensionsCount(), 0);
-  const int flat_size = output_shape.FlatSize();
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = *value_data;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_

+ 0 - 39
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h

@@ -1,39 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
-
-#include <cmath>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void Floor(const RuntimeShape& input_shape, const float* input_data,
-                  const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    int offset = i;
-    output_data[offset] = std::floor(input_data[offset]);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_

+ 0 - 35
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h

@@ -1,35 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
-
-#include <cmath>
-#include <functional>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename T>
-T FloorDiv(T input1, T input2) {
-  return std::floor(std::divides<double>()(static_cast<double>(input1),
-                                           static_cast<double>(input2)));
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_

+ 0 - 44
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h

@@ -1,44 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
-
-#include <cmath>
-#include <functional>
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-T FloorMod(T input1, T input2) {
-  struct FloatMod {
-    float operator()(const float lhs, const float rhs) const {
-      return std::fmod(lhs, rhs);
-    }
-  };
-  using ModFunc = typename std::conditional<std::is_integral<T>::value,
-                                            std::modulus<T>, FloatMod>::type;
-  ModFunc mod_func;
-  T trunc_mod = mod_func(input1, input2);
-  return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
-             ? (trunc_mod + input2)
-             : trunc_mod;
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_

+ 0 - 323
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h

@@ -1,323 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
-
-#include <algorithm>
-
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const float* input_data, const RuntimeShape& weights_shape,
-    const float* weights_data, const RuntimeShape& bias_shape,
-    const float* bias_data, const RuntimeShape& output_shape,
-    float* output_data) {
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  // TODO(b/62193649): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dims_count = output_shape.DimensionsCount();
-  const int weights_dims_count = weights_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
-  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
-                                       output_shape, output_dims_count - 1);
-  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      float total = 0.f;
-      for (int d = 0; d < accum_depth; ++d) {
-        total += input_data[b * accum_depth + d] *
-                 weights_data[out_c * accum_depth + d];
-      }
-      float bias_value = 0.0f;
-      if (bias_data) {
-        bias_value = bias_data[out_c];
-      }
-      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
-          total + bias_value, output_activation_min, output_activation_max);
-    }
-  }
-}
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8_t* input_data, const RuntimeShape& filter_shape,
-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    uint8_t* output_data) {
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  // TODO(b/62193649): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * (input_val + input_offset);
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
-    }
-  }
-}
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8_t* input_data, const RuntimeShape& filter_shape,
-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  TFLITE_DCHECK_EQ(output_offset, 0);
-  // TODO(b/62193649): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32_t accum = bias_data[out_c];
-      // Accumulation loop.
-      for (int d = 0; d < accum_depth; ++d) {
-        int16_t input_val = input_data[b * accum_depth + d] + input_offset;
-        int16_t filter_val =
-            filter_data[out_c * accum_depth + d] + filter_offset;
-        accum += filter_val * input_val;
-      }
-      // Down-scale the final int32_t accumulator to the scale used by our
-      // (16-bit, typically 3 integer bits) fixed-point format. The quantized
-      // multiplier and shift here have been pre-computed offline
-      // (e.g. by toco).
-      accum =
-          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
-      // Saturate, cast to int16_t, and store to output array.
-      accum = std::max(accum, output_activation_min - output_offset);
-      accum = std::min(accum, output_activation_max - output_offset);
-      accum += output_offset;
-      output_data[out_c + output_depth * b] = accum;
-    }
-  }
-}
-
-inline void ShuffledFullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8_t* input_data, const RuntimeShape& weights_shape,
-    const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-  // TODO(b/62193649): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int weights_dim_count = weights_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
-  TFLITE_DCHECK((accum_depth % 16) == 0);
-  TFLITE_DCHECK((output_depth % 4) == 0);
-
-  // Shuffling and xoring of input activations into the workspace buffer
-  uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
-  if (batches == 1) {
-    for (int i = 0; i < accum_depth; i++) {
-      shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
-    }
-  } else if (batches == 4) {
-    for (int c = 0; c < accum_depth; c += 16) {
-      for (int b = 0; b < 4; b++) {
-        const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
-        for (int j = 0; j < 16; j++) {
-          uint8_t src_val = *src_data_ptr++;
-          // Flip the sign bit, so that the kernel will only need to
-          // reinterpret these uint8_t values as int8_t, getting for free the
-          // subtraction of the zero_point value 128.
-          uint8_t dst_val = src_val ^ 0x80;
-          *shuffled_input_workspace_ptr++ = dst_val;
-        }
-      }
-    }
-  } else {
-    TFLITE_DCHECK(false);
-    return;
-  }
-
-  // Actual computation
-  if (batches == 1) {
-    int16_t* output_ptr = output_data;
-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
-    // so that just reinterpreting them as int8_t values is equivalent to
-    // subtracting 128 from them, thus implementing for free the subtraction of
-    // the zero_point value 128.
-    const int8_t* shuffled_weights_ptr =
-        reinterpret_cast<const int8_t*>(shuffled_weights_data);
-    // Likewise, we preshuffled and pre-xored the input data above.
-    const int8_t* shuffled_input_data =
-        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
-    for (int c = 0; c < output_depth; c += 4) {
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32_t accum[4] = {0};
-      // Accumulation loop.
-      for (int d = 0; d < accum_depth; d += 16) {
-        for (int i = 0; i < 4; i++) {
-          for (int j = 0; j < 16; j++) {
-            int8_t input_val = shuffled_input_data[d + j];
-            int8_t weights_val = *shuffled_weights_ptr++;
-            accum[i] += weights_val * input_val;
-          }
-        }
-      }
-      for (int i = 0; i < 4; i++) {
-        // Add bias value
-        int32_t acc = accum[i] + bias_data[c + i];
-        // Down-scale the final int32_t accumulator to the scale used by our
-        // (16-bit, typically 3 integer bits) fixed-point format. The quantized
-        // multiplier and shift here have been pre-computed offline
-        // (e.g. by toco).
-        acc =
-            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-        // Saturate, cast to int16_t, and store to output array.
-        acc = std::max(acc, output_activation_min);
-        acc = std::min(acc, output_activation_max);
-        output_ptr[c + i] = acc;
-      }
-    }
-  } else if (batches == 4) {
-    int16_t* output_ptr = output_data;
-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
-    // so that just reinterpreting them as int8_t values is equivalent to
-    // subtracting 128 from them, thus implementing for free the subtraction of
-    // the zero_point value 128.
-    const int8_t* shuffled_weights_ptr =
-        reinterpret_cast<const int8_t*>(shuffled_weights_data);
-    // Likewise, we preshuffled and pre-xored the input data above.
-    const int8_t* shuffled_input_data =
-        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
-    for (int c = 0; c < output_depth; c += 4) {
-      const int8_t* shuffled_input_ptr = shuffled_input_data;
-      // Accumulation loop.
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32_t accum[4][4];
-      for (int i = 0; i < 4; i++) {
-        for (int b = 0; b < 4; b++) {
-          accum[i][b] = 0;
-        }
-      }
-      for (int d = 0; d < accum_depth; d += 16) {
-        for (int i = 0; i < 4; i++) {
-          for (int b = 0; b < 4; b++) {
-            for (int j = 0; j < 16; j++) {
-              int8_t input_val = shuffled_input_ptr[16 * b + j];
-              int8_t weights_val = shuffled_weights_ptr[16 * i + j];
-              accum[i][b] += weights_val * input_val;
-            }
-          }
-        }
-        shuffled_input_ptr += 64;
-        shuffled_weights_ptr += 64;
-      }
-      for (int i = 0; i < 4; i++) {
-        for (int b = 0; b < 4; b++) {
-          // Add bias value
-          int32_t acc = accum[i][b] + bias_data[c + i];
-          // Down-scale the final int32_t accumulator to the scale used by our
-          // (16-bit, typically 3 integer bits) fixed-point format. The
-          // quantized multiplier and shift here have been pre-computed offline
-          // (e.g. by toco).
-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              output_shift);
-          // Saturate, cast to int16_t, and store to output array.
-          acc = std::max(acc, output_activation_min);
-          acc = std::min(acc, output_activation_max);
-          output_ptr[b * output_depth + c + i] = acc;
-        }
-      }
-    }
-  } else {
-    TFLITE_DCHECK(false);
-    return;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_

+ 0 - 145
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h

@@ -1,145 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
-
-#include <algorithm>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void CheckArithmeticParams(const ArithmeticParams& params) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  // Input offset is negative input zero point. Activation tensors are
-  // asymmetric quantized so they span the full int8 range.
-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
-  TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
-  TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
-}
-
-inline void ElementWise(
-    int size, const ArithmeticParams& params, const int8_t* input1_data,
-    const int8_t* input2_data, int8_t* output_data,
-    void (*check_arithmetic_params)(const ArithmeticParams&),
-    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
-  CheckArithmeticParams(params);
-  for (int i = 0; i < size; ++i) {
-    output_data[i] = binary_func(input1_data[i], input2_data[i], params);
-  }
-}
-
-inline void BroadcastBinaryFunction4DSlow(
-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
-    const int8_t* input1_data, const RuntimeShape& input2_shape,
-    const int8_t* input2_data, const RuntimeShape& output_shape,
-    int8_t* output_data,
-    void (*check_arithmetic_params)(const ArithmeticParams&),
-    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func(
-              input1_data[SubscriptToIndex(desc1, b, y, x, c)],
-              input2_data[SubscriptToIndex(desc2, b, y, x, c)], params);
-        }
-      }
-    }
-  }
-}
-
-inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) {
-  const int32_t input1_val = params.input1_offset + x;
-  const int32_t input2_val = params.input2_offset + y;
-  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-  const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-  const int32_t scaled_input1_val =
-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          shifted_input1_val, params.input1_multiplier, params.input1_shift);
-  const int32_t scaled_input2_val =
-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          shifted_input2_val, params.input2_multiplier, params.input2_shift);
-  const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-  const int32_t raw_output =
-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          raw_sum, params.output_multiplier, params.output_shift) +
-      params.output_offset;
-  const int32_t clamped_output =
-      std::min(params.quantized_activation_max,
-               std::max(params.quantized_activation_min, raw_output));
-  return static_cast<int8_t>(clamped_output);
-}
-
-// Element-wise add that can often be used for inner loop of broadcast add as
-// well as the non-broadcast add.
-inline void AddElementwise(int size, const ArithmeticParams& params,
-                           const int8_t* input1_data, const int8_t* input2_data,
-                           int8_t* output_data) {
-  ElementWise(size, params, input1_data, input2_data, output_data,
-              CheckArithmeticParams, AddFunc);
-}
-
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const int8_t* input1_data,
-                const RuntimeShape& input2_shape, const int8_t* input2_data,
-                const RuntimeShape& output_shape, int8_t* output_data) {
-  CheckArithmeticParams(params);
-
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
-                               const RuntimeShape& input1_shape,
-                               const int8_t* input1_data,
-                               const RuntimeShape& input2_shape,
-                               const int8_t* input2_data,
-                               const RuntimeShape& output_shape,
-                               int8_t* output_data) {
-  BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape,
-                                input2_data, output_shape, output_data,
-                                CheckArithmeticParams, AddFunc);
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_

+ 0 - 238
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h

@@ -1,238 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-// Fixed-point per-channel-quantization convolution reference kernel.
-inline void ConvPerChannel(
-    const ConvParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
-  // Get parameters.
-  const int32_t input_offset = params.input_offset;  // r = s(q - Z)
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int32_t output_offset = params.output_offset;
-
-  // Set min and max value of the output.
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  // Consistency check.
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = input_shape.Dims(3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-
-  // Check dimensions of the tensors.
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int filter_input_depth = filter_shape.Dims(3);
-  const int groups = input_depth / filter_input_depth;
-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
-  const int filters_per_group = output_depth / groups;
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      const int in_y_origin = (out_y * stride_height) - pad_height;
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        const int in_x_origin = (out_x * stride_width) - pad_width;
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          auto group = out_channel / filters_per_group;
-          int32_t acc = 0;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
-
-              // Zero padding by omitting the areas outside the image.
-              const bool is_point_inside_image =
-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                  (in_y < input_height);
-
-              if (!is_point_inside_image) {
-                continue;
-              }
-
-              for (int in_channel = 0; in_channel < filter_input_depth;
-                   ++in_channel) {
-                int32_t input_val =
-                    input_data[Offset(input_shape, batch, in_y, in_x,
-                                      in_channel + group * filter_input_depth)];
-                int32_t filter_val = filter_data[Offset(
-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
-                // Accumulate with 32 bits accumulator.
-                // In the nudging process during model quantization, we force
-                // real value of 0.0 be represented by a quantized value. This
-                // guarantees that the input_offset is a int8_t, even though
-                // it is represented using int32_t. int32_t += int8_t *
-                // (int8_t - int8_t) so the highest value we can get from each
-                // accumulation is [-127, 127] * ([-128, 127] -
-                // [-128, 127]), which is [-32512, 32512]. log2(32512)
-                // = 14.98, which means we can accumulate at least 2^16
-                // multiplications without overflow. The accumulator is
-                // applied to a filter so the accumulation logic will hold as
-                // long as the filter size (filter_y * filter_x * in_channel)
-                // does not exceed 2^16, which is the case in all the models
-                // we have seen so far.
-                // TODO(b/174275578): Add a check to make sure the
-                // accumulator depth is smaller than 2^16.
-                acc += filter_val * (input_val + input_offset);
-              }
-            }
-          }
-
-          if (bias_data) {
-            acc += bias_data[out_channel];
-          }
-          acc = MultiplyByQuantizedMultiplier(
-              acc, output_multiplier[out_channel], output_shift[out_channel]);
-          acc += output_offset;
-          acc = std::max(acc, output_activation_min);
-          acc = std::min(acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<int8_t>(acc);
-        }
-      }
-    }
-  }
-}
-
-// Fixed-point per-channel-quantization convolution reference kernel.
-// 16-bit data and 8-bit filter
-template <typename AccumScalar>
-inline void ConvPerChannel(
-    const ConvParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  // Get parameters.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-
-  // Set min and max value of the output.
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  // Consistency check.
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = input_shape.Dims(3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-
-  // Check dimensions of the tensors.
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int filter_input_depth = filter_shape.Dims(3);
-  const int groups = input_depth / filter_input_depth;
-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
-  const int filters_per_group = output_depth / groups;
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      const int in_y_origin = (out_y * stride_height) - pad_height;
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        const int in_x_origin = (out_x * stride_width) - pad_width;
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          auto group = out_channel / filters_per_group;
-          AccumScalar acc = 0;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
-
-              // Zero padding by omitting the areas outside the image.
-              const bool is_point_inside_image =
-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                  (in_y < input_height);
-
-              if (!is_point_inside_image) {
-                continue;
-              }
-
-              for (int in_channel = 0; in_channel < filter_input_depth;
-                   ++in_channel) {
-                int32_t input_val =
-                    input_data[Offset(input_shape, batch, in_y, in_x,
-                                      in_channel + group * filter_input_depth)];
-                int32_t filter_val = filter_data[Offset(
-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
-                // Accumulate with 64 bits accumulator.
-                // int64_t += int8_t * int16_t so the highest value we can
-                // get from each accumulation is [-127, 127] * ([-32768,
-                // 32767] -
-                // [-32768, 32767]), which is [-8322945, 8322945].
-                // log2(8322945) = 22.99.
-                acc += filter_val * input_val;
-              }
-            }
-          }
-          if (bias_data) {
-            acc += bias_data[out_channel];
-          }
-          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
-              acc, output_multiplier[out_channel], output_shift[out_channel]);
-          scaled_acc = std::max(scaled_acc, output_activation_min);
-          scaled_acc = std::min(scaled_acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<int16_t>(scaled_acc);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_

+ 0 - 291
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h

@@ -1,291 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-inline void DepthwiseConvPerChannel(
-    const DepthwiseParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
-  // Get parameters.
-  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const int32_t input_offset = params.input_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  // Check dimensions of the tensors.
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-          for (int m = 0; m < depth_multiplier; ++m) {
-            const int output_channel = m + in_channel * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            int32_t acc = 0;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height);
-                if (is_point_inside_image) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val = filter_data[Offset(
-                      filter_shape, 0, filter_y, filter_x, output_channel)];
-                  // Accumulate with 32 bits accumulator.
-                  // In the nudging process during model quantization, we force
-                  // real value of 0.0 be represented by a quantized value. This
-                  // guarantees that the input_offset is a int8_t, even though
-                  // it is represented using int32_t. int32_t += int8_t *
-                  // (int8_t - int8_t) so the highest value we can get from each
-                  // accumulation is [-127, 127] * ([-128, 127] -
-                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
-                  // = 14.98, which means we can accumulate at least 2^16
-                  // multiplications without overflow. The accumulator is
-                  // applied to a filter so the accumulation logic will hold as
-                  // long as the filter size (filter_y * filter_x * in_channel)
-                  // does not exceed 2^16, which is the case in all the models
-                  // we have seen so far.
-                  // TODO(b/174275578): Add a check to make sure the
-                  // accumulator depth is smaller than 2^16.
-                  acc += filter_val * (input_val + input_offset);
-                }
-              }
-            }
-            if (bias_data) {
-              acc += bias_data[output_channel];
-            }
-            acc = MultiplyByQuantizedMultiplier(
-                acc, output_multiplier[output_channel],
-                output_shift[output_channel]);
-            acc += output_offset;
-            acc = std::max(acc, output_activation_min);
-            acc = std::min(acc, output_activation_max);
-            output_data[Offset(output_shape, batch, out_y, out_x,
-                               output_channel)] = static_cast<int8_t>(acc);
-          }
-        }
-      }
-    }
-  }
-}
-
-inline void DepthwiseConvPerChannel(
-    const DepthwiseParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const std::int64_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  // Get parameters.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  // Check dimensions of the tensors.
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-          for (int m = 0; m < depth_multiplier; ++m) {
-            const int output_channel = m + in_channel * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            std::int64_t acc = 0;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height);
-                if (is_point_inside_image) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val = filter_data[Offset(
-                      filter_shape, 0, filter_y, filter_x, output_channel)];
-                  // Accumulate with 64 bits accumulator.
-                  // We assume maximum of 2^16 accumulations as with the 8-bit
-                  // case so actually the value in the accumulator should not
-                  // exceed 40 bits
-                  acc += static_cast<int64_t>(filter_val) *
-                         static_cast<int64_t>(input_val);
-                }
-              }
-            }
-            if (bias_data) {
-              acc += bias_data[output_channel];
-            }
-            int32_t scaled_acc = MultiplyByQuantizedMultiplier(
-                acc, output_multiplier[output_channel],
-                output_shift[output_channel]);
-            scaled_acc = std::max(scaled_acc, output_activation_min);
-            scaled_acc = std::min(scaled_acc, output_activation_max);
-            output_data[Offset(output_shape, batch, out_y, out_x,
-                               output_channel)] =
-                static_cast<int16_t>(scaled_acc);
-          }
-        }
-      }
-    }
-  }
-}
-
-inline void DepthwiseConvHybridPerChannel(
-    const DepthwiseParams& params, float* scaling_factors_ptr,
-    const RuntimeShape& input_shape, const int8_t* input_data,
-    const RuntimeShape& filter_shape, const int8_t* filter_data,
-    const RuntimeShape& bias_shape, const float* bias_data,
-    const RuntimeShape& output_shape, float* output_data,
-    const float* per_channel_scale, int32_t* input_offset) {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  // Check dimensions of the tensors.
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int bias_depth = bias_shape.FlatSize();
-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-  TFLITE_DCHECK_EQ(bias_depth, output_depth);
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-          for (int m = 0; m < depth_multiplier; ++m) {
-            const int output_channel = m + in_channel * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            int32_t acc = 0;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height);
-                if (is_point_inside_image) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val = filter_data[Offset(
-                      filter_shape, 0, filter_y, filter_x, output_channel)];
-                  acc += filter_val * (input_val - input_offset[batch]);
-                }
-              }
-            }
-            float acc_float = static_cast<float>(acc);
-            acc_float *=
-                per_channel_scale[output_channel] * scaling_factors_ptr[batch];
-            if (bias_data && output_channel < bias_depth) {
-              acc_float += bias_data[output_channel];
-            }
-            output_data[Offset(output_shape, batch, out_y, out_x,
-                               output_channel)] =
-                ActivationFunctionWithMinMax(acc_float, output_activation_min,
-                                             output_activation_max);
-          }
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_

+ 0 - 201
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h

@@ -1,201 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-// For per-channel functions, since it is defined in quantization spec that
-// weights are symmetric
-// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
-// zero_point (params.weights_offset) is always 0.
-// However, for per-tensor functions, params.weights_offset is still applied for
-// backward compatibility.
-
-inline void FullyConnectedPerChannel(
-    const FullyConnectedParams& params, const int32_t* output_multiplier,
-    const int* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
-  const int32_t input_offset = params.input_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = output_shape.Dims(0);
-  const int output_depth = output_shape.Dims(1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += filter_val * (input_val + input_offset);
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
-                                          output_shift[out_c]);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
-    }
-  }
-}
-
-template <typename AccumScalar>
-inline void FullyConnectedPerChannel(
-    const FullyConnectedParams& params, const int32_t* output_multiplier,
-    const int* output_shift, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = output_shape.Dims(output_dim_count - 1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      AccumScalar acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += filter_val * input_val;
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      int32_t acc_scaled = MultiplyByQuantizedMultiplier(
-          acc, output_multiplier[out_c], output_shift[out_c]);
-      acc_scaled = std::max(acc_scaled, output_activation_min);
-      acc_scaled = std::min(acc_scaled, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
-    }
-  }
-}
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = output_shape.Dims(output_dim_count - 1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * (input_val + input_offset);
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
-    }
-  }
-}
-
-template <typename AccumScalar>
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = output_shape.Dims(output_dim_count - 1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      AccumScalar acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * input_val;
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      int32_t acc_scaled =
-          MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc_scaled = std::max(acc_scaled, output_activation_min);
-      acc_scaled = std::min(acc_scaled, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_

+ 0 - 67
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h

@@ -1,67 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
-                            int32_t depth, const int8_t* input_data,
-                            int8_t* output_data) {
-  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
-  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
-  // The output scale must be in sync with Prepare().
-  // Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
-  // to [-1, 127/128].
-  static constexpr int32_t kOutputScale = 7;
-  for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
-    // int32_t = (int8_t - int8_t) ^ 2.
-    // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
-    // safe from overflowing in at least 2^16 steps.
-    int32_t acc = 0;
-    for (int inner_index = 0; inner_index < depth; ++inner_index) {
-      int32_t input =
-          input_data[depth * outer_index + inner_index] - input_zero_point;
-      acc += input * input;
-    }
-    int32_t inv_l2norm_multiplier;
-    int inv_l2norm_shift;
-    GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
-                                     &inv_l2norm_shift);
-
-    for (int inner_index = 0; inner_index < depth; ++inner_index) {
-      int32_t input =
-          input_data[depth * outer_index + inner_index] - input_zero_point;
-
-      // Rescale and downcast. Rescale is folded into the division.
-      int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
-          input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
-      output_in_q24 =
-          std::min(static_cast<int32_t>(kMaxInt8),
-                   std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
-      output_data[depth * outer_index + inner_index] =
-          static_cast<int8_t>(output_in_q24);
-    }
-  }
-}
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_

+ 0 - 121
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h

@@ -1,121 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
-
-#include <algorithm>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
-                     int32_t input_multiplier, int32_t input_left_shift,
-                     int32_t input_size, const int8_t* input_data,
-                     int8_t* output_data) {
-  // Integer bits must be in sync with Prepare() function.
-  static constexpr int32_t kInputIntegerBits = 4;
-  static constexpr int32_t kOutputIntegerBits = 8;
-  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
-  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
-  static constexpr int32_t kOutputZeroPoint = -128;
-
-  for (int i = 0; i < input_size; ++i) {
-    const int32_t input =
-        static_cast<int32_t>(input_data[i]) - input_zero_point;
-    if (input <= -input_range_radius) {
-      output_data[i] = kMinInt8;
-    } else if (input >= input_range_radius) {
-      output_data[i] = kMaxInt8;
-    } else {
-      const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
-          input, input_multiplier, input_left_shift);
-      using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
-      const int32_t output_in_q0 =
-          gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
-
-      // Rescale and downcast.
-      using gemmlowp::RoundingDivideByPOT;
-      int32_t output_in_q23 =
-          RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
-      output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
-                                        static_cast<int32_t>(kMinInt8)),
-                               static_cast<int32_t>(kMaxInt8));
-      output_data[i] = static_cast<int8_t>(output_in_q23);
-    }
-  }
-}
-
-inline void Logistic(int32_t input_multiplier, int32_t input_left_shift,
-                     int32_t input_size, const int16_t* ptr_input_data,
-                     int16_t* ptr_output_data) {
-  // We use the LUT for sigmoid and take into account, that
-  // tanh(x) = 2*sigmoid(2*x) - 1
-
-  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
-  // In case of general parameter scale, multiplier 3 is taken into account
-  // in TanhPrepare function and it is included in
-  // input_multiplier already.
-
-  TFLITE_DCHECK_GE(input_left_shift, 0);
-  if (input_multiplier == 0) {  // power of two case
-    input_multiplier = 3 << input_left_shift;
-    input_left_shift = 0;
-  }
-
-  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
-
-  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
-    int32_t input_data =
-        ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
-
-    // We do interpolation on unsigned values.
-    uint32_t abs_input_data = abs(input_data);
-
-    // We divide by 2 power of 9, because
-    // we need to divide by 2 in power of 7 for
-    // the input conversion + 1/4 from the scale above.
-
-    // Define uh as uint32_t type not to make this function overflow.
-    uint32_t uh = abs_input_data >> 9;
-    uint32_t result;
-
-    if (uh >= 255) {
-      // Saturate to maximum.
-      result = 0x7FFF << 10;
-    } else {
-      uint32_t ua = sigmoid_table_uint16[uh];
-      uint32_t ub = sigmoid_table_uint16[uh + 1];
-      uint32_t ut = abs_input_data & 0x1ff;
-      // Interpolation is done using the fractional bit.
-      result = (ua << 9) + ut * (ub - ua);
-    }
-
-    result = (input_data >= 0) ? (result + (1 << 9))
-                               : ((1 << (16 + 9)) - result + (1 << 9) - 1);
-
-    // Back to 16-bit.
-    result >>= 10;
-
-    *ptr_output_data = result;
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_

+ 0 - 79
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h

@@ -1,79 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-template <typename integer_type>
-inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
-                 int32_t shift, const RuntimeShape& unextended_input_shape,
-                 const integer_type* input_data, int32_t input_zero_point,
-                 const RuntimeShape& unextended_output_shape,
-                 integer_type* output_data, int32_t output_zero_point) {
-  // Current implementation only supports dimension equals 4 and simultaneous
-  // reduction over width and height.
-  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-  const int output_batch = output_shape.Dims(0);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int output_depth = output_shape.Dims(3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int num_elements_in_axis = input_width * input_height;
-
-  TFLITE_CHECK_EQ(op_params.axis_count, 2);
-  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
-               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
-  TFLITE_CHECK_EQ(output_height, 1);
-  TFLITE_CHECK_EQ(output_width, 1);
-
-  static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
-  static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();
-
-  for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_d = 0; out_d < output_depth; ++out_d) {
-      int32_t acc = 0;
-      for (int in_h = 0; in_h < input_height; ++in_h) {
-        for (int in_w = 0; in_w < input_width; ++in_w) {
-          acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
-                 input_zero_point;
-        }
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
-      acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
-                    : (acc - num_elements_in_axis / 2) / num_elements_in_axis;
-      acc += output_zero_point;
-      acc = std::min(std::max(acc, kMinInt), kMaxInt);
-      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
-          static_cast<integer_type>(acc);
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_

+ 0 - 133
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h

@@ -1,133 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
-
-#include <algorithm>
-
-#include "fixedpoint/fixedpoint.h"
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-template <typename T>
-inline void MulElementwise(int size, const ArithmeticParams& params,
-                           const T* input1_data, const T* input2_data,
-                           T* output_data) {
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t unclamped_result =
-        params.output_offset +
-        MultiplyByQuantizedMultiplier(input1_val * input2_val,
-                                      params.output_multiplier,
-                                      params.output_shift);
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, unclamped_result));
-    output_data[i] = static_cast<T>(clamped_output);
-  }
-}
-
-template <typename T>
-inline void Mul(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const T* input1_data,
-                const RuntimeShape& input2_shape, const T* input2_data,
-                const RuntimeShape& output_shape, T* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  ruy::profiler::ScopeLabel label("Mul/8bit");
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-// Mul with 16 bit inputs and int8_t outputs.
-inline void Mul(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const int16_t* input1_data,
-                const RuntimeShape& input2_shape, const int16_t* input2_data,
-                const RuntimeShape& output_shape, int8_t* output_data) {
-  ruy::profiler::ScopeLabel label("Mul/Int16Int8");
-  int32_t output_offset = params.output_offset;
-  int32_t output_activation_min = params.quantized_activation_min;
-  int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    // F0 uses 0 integer bits, range [-1, 1].
-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-
-    F0 unclamped_result =
-        F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
-    int16_t rescaled_result =
-        gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
-    int16_t clamped_result = std::min<int16_t>(
-        output_activation_max - output_offset, rescaled_result);
-    clamped_result = std::max<int16_t>(output_activation_min - output_offset,
-                                       clamped_result);
-    output_data[i] = output_offset + clamped_result;
-  }
-}
-
-template <typename T>
-inline void BroadcastMul4DSlow(
-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
-
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  // The input shapes are extended as part of NdArrayDesc initialization.
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          const int32_t input1_val =
-              params.input1_offset +
-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
-          const int32_t input2_val =
-              params.input2_offset +
-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
-          const int32_t unclamped_result =
-              params.output_offset +
-              MultiplyByQuantizedMultiplier(input1_val * input2_val,
-                                            params.output_multiplier,
-                                            params.output_shift);
-          const int32_t clamped_output = std::min(
-              params.quantized_activation_max,
-              std::max(params.quantized_activation_min, unclamped_result));
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              static_cast<T>(clamped_output);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_

+ 0 - 264
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h

@@ -1,264 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
-
-#include <algorithm>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline bool AveragePool(const PoolParams& params,
-                        const RuntimeShape& input_shape,
-                        const int8_t* input_data,
-                        const RuntimeShape& output_shape, int8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int32_t acc = 0;
-          int filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              acc +=
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              filter_count++;
-            }
-          }
-          if (filter_count == 0) return false;
-          // Round to the closest integer value.
-          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
-                        : (acc - filter_count / 2) / filter_count;
-          acc = std::max(acc, params.quantized_activation_min);
-          acc = std::min(acc, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<int8_t>(acc);
-        }
-      }
-    }
-  }
-  return true;
-}
-
-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
-                    const int8_t* input_data, const RuntimeShape& output_shape,
-                    int8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_GE(params.quantized_activation_min,
-                   std::numeric_limits<int8_t>::min());
-  TFLITE_DCHECK_LE(params.quantized_activation_max,
-                   std::numeric_limits<int8_t>::max());
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int8_t max = std::numeric_limits<int8_t>::lowest();
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              max = std::max(
-                  max,
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
-            }
-          }
-          max = std::max<int8_t>(max, params.quantized_activation_min);
-          max = std::min<int8_t>(max, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<int8_t>(max);
-        }
-      }
-    }
-  }
-}
-
-inline bool AveragePool(const PoolParams& params,
-                        const RuntimeShape& input_shape,
-                        const int16_t* input_data,
-                        const RuntimeShape& output_shape,
-                        int16_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int32_t acc = 0;
-          int filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              acc +=
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              filter_count++;
-            }
-          }
-          if (filter_count == 0) return false;
-          // Round to the closest integer value.
-          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
-                        : (acc - filter_count / 2) / filter_count;
-          acc = std::max(acc, params.quantized_activation_min);
-          acc = std::min(acc, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<int16_t>(acc);
-        }
-      }
-    }
-  }
-  return true;
-}
-
-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
-                    const int16_t* input_data, const RuntimeShape& output_shape,
-                    int16_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_GE(params.quantized_activation_min,
-                   std::numeric_limits<int16_t>::min());
-  TFLITE_DCHECK_LE(params.quantized_activation_max,
-                   std::numeric_limits<int16_t>::max());
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int16_t max = std::numeric_limits<int16_t>::lowest();
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              max = std::max(
-                  max,
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
-            }
-          }
-          max = std::max<int16_t>(max, params.quantized_activation_min);
-          max = std::min<int16_t>(max, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<int16_t>(max);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_

+ 0 - 117
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h

@@ -1,117 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
-
-#include <algorithm>
-#include <limits>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
-                 int32_t input_multiplier, int32_t input_shift,
-                 const RuntimeShape& input_shape, const int8_t* input_data,
-                 const RuntimeShape& output_shape, int8_t* output_data) {
-  // Integer bits must be in sync with Prepare() function.
-  static constexpr int32_t kInputIntegerBits = 4;
-  static constexpr int32_t kOutputScale = 7;
-  static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
-  static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
-  using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
-
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; ++i) {
-    const int32_t input =
-        static_cast<int32_t>(input_data[i]) - input_zero_point;
-    if (input <= -input_range_radius) {
-      output_data[i] = kMinInt8;
-    } else if (input >= input_range_radius) {
-      output_data[i] = kMaxInt8;
-    } else {
-      const int32_t input_in_q4 =
-          MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
-      const int32_t output_in_q0 =
-          gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
-
-      // Rescale and downcast.
-      using gemmlowp::RoundingDivideByPOT;
-      int32_t output_in_q24 =
-          RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
-      output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
-      output_data[i] = static_cast<int8_t>(output_in_q24);
-    }
-  }
-}
-
-inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
-                 const RuntimeShape& input_shape, const int16_t* ptr_input_data,
-                 const RuntimeShape& output_shape, int16_t* ptr_output_data) {
-  // We use the LUT for sigmoid and take into account, that
-  // tanh(x) = 2*sigmoid(2*x) - 1
-
-  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
-  // In case of general parameter scale, multiplier 3 is taken into account
-  // in TanhPrepare function and it is included in
-  // input_multiplier already.
-
-  if (input_multiplier == 0) {  // power of two case
-    input_multiplier = 3 << input_left_shift;
-    input_left_shift = 0;
-  }
-
-  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
-
-  int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
-    int32_t input_data =
-        ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
-
-    uint32_t abs_input_data = abs(input_data);
-    uint32_t uh = abs_input_data >> 8;
-    int32_t result;
-
-    if (uh >= 255) {
-      // Saturate to maximum.
-      result = 0xFFFF << 8;
-    } else {
-      uint32_t ua = sigmoid_table_uint16[uh];
-      uint32_t ub = sigmoid_table_uint16[uh + 1];
-
-      uint8_t ut = abs_input_data & 0xFF;
-
-      result = (ua << 8) + ut * (ub - ua);
-    }
-
-    result = (input_data >= 0)
-                 ? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
-                 : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
-
-    // Convert back to 16-bit.
-    result >>= (9 - 1);
-
-    *ptr_output_data = result;
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_

+ 0 - 224
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h

@@ -1,224 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-// Fixed-point per-channel-quantization transpose convolution reference kernel.
-inline void TransposeConv(
-    const ConvParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
-    int32_t* scratch_buffer) {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  (void)im2col_data;   // only used in optimized code.
-  (void)im2col_shape;  // only used in optimized code.
-
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int32_t input_offset = params.input_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
-  const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  const int num_elements = output_shape.FlatSize();
-  // We need to initialize scratch_buffer to all 0s, as we apply the same
-  // 'scatter' based trick as in float version.
-  memset(scratch_buffer, 0, num_elements * sizeof(int32_t));
-
-  // Loop through input elements one at a time.
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int in_y = 0; in_y < input_height; ++in_y) {
-      for (int in_x = 0; in_x < input_width; ++in_x) {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-          // Loop through the output elements it will influence.
-          const int out_x_origin = (in_x * stride_width) - pad_width;
-          const int out_y_origin = (in_y * stride_height) - pad_height;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int out_channel = 0; out_channel < output_depth;
-                   ++out_channel) {
-                // Compute output element location.
-                const int out_x = out_x_origin + filter_x;
-                const int out_y = out_y_origin + filter_y;
-                // We cannot accumulate out of bounds.
-                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
-                    (out_y < output_height)) {
-                  const int8_t input_value = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  const int8_t filter_value =
-                      filter_data[Offset(filter_shape, out_channel, filter_y,
-                                         filter_x, in_channel)];
-                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
-                                        out_channel)] +=
-                      (input_value + input_offset) * filter_value;
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
-                                              out_channel)];
-          if (bias_data) {
-            acc += bias_data[out_channel];
-          }
-          acc = MultiplyByQuantizedMultiplier(
-              acc, output_multiplier[out_channel], output_shift[out_channel]);
-          acc += output_offset;
-          acc = std::max(acc, output_activation_min);
-          acc = std::min(acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<int8_t>(acc);
-        }
-      }
-    }
-  }
-}
-
-// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator
-template <typename Scalar>
-inline void TransposeConv(
-    const ConvParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const Scalar* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
-    Scalar* scratch_buffer) {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  (void)im2col_data;   // only used in optimized code.
-  (void)im2col_shape;  // only used in optimized code.
-
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
-  const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  const int num_elements = output_shape.FlatSize();
-  // We need to initialize scratch_buffer to all 0s, as we apply the same
-  // 'scatter' based trick as in float version.
-  memset(scratch_buffer, 0, num_elements * sizeof(Scalar));
-
-  // Loop through input elements one at a time.
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int in_y = 0; in_y < input_height; ++in_y) {
-      for (int in_x = 0; in_x < input_width; ++in_x) {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-          // Loop through the output elements it will influence.
-          const int out_x_origin = (in_x * stride_width) - pad_width;
-          const int out_y_origin = (in_y * stride_height) - pad_height;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int out_channel = 0; out_channel < output_depth;
-                   ++out_channel) {
-                // Compute output element location.
-                const int out_x = out_x_origin + filter_x;
-                const int out_y = out_y_origin + filter_y;
-                // We cannot accumulate out of bounds.
-                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
-                    (out_y < output_height)) {
-                  const int32_t input_value = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  const int32_t filter_value =
-                      filter_data[Offset(filter_shape, out_channel, filter_y,
-                                         filter_x, in_channel)];
-                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
-                                        out_channel)] +=
-                      input_value * filter_value;
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
-                                             out_channel)];
-          if (bias_data) {
-            acc += bias_data[out_channel];
-          }
-          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
-              acc, output_multiplier[out_channel], output_shift[out_channel]);
-          scaled_acc = std::max(scaled_acc, output_activation_min);
-          scaled_acc = std::min(scaled_acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<int16_t>(scaled_acc);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_

+ 0 - 90
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h

@@ -1,90 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
-
-#include <algorithm>
-#include <cmath>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
-                            const RuntimeShape& input_shape,
-                            const float* input_data,
-                            const RuntimeShape& output_shape,
-                            float* output_data, float epsilon = 1e-6) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-  for (int i = 0; i < outer_size; ++i) {
-    float squared_l2_norm = 0;
-    for (int c = 0; c < depth; ++c) {
-      const float val = input_data[depth * i + c];
-      squared_l2_norm += val * val;
-    }
-    float l2_norm = std::sqrt(squared_l2_norm);
-    l2_norm = std::max(l2_norm, epsilon);
-    for (int c = 0; c < depth; ++c) {
-      output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
-    }
-  }
-}
-
-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
-                            const RuntimeShape& input_shape,
-                            const uint8_t* input_data,
-                            const RuntimeShape& output_shape,
-                            uint8_t* output_data) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int32_t input_zero_point = op_params.input_zero_point;
-
-  for (int i = 0; i < outer_size; ++i) {
-    int32_t square_l2_norm = 0;
-    for (int c = 0; c < depth; c++) {
-      int32_t diff = input_data[depth * i + c] - input_zero_point;
-      square_l2_norm += diff * diff;
-    }
-    int32_t inv_l2norm_multiplier;
-    int inv_l2norm_shift;
-    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
-                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
-    for (int c = 0; c < depth; c++) {
-      int32_t diff = input_data[depth * i + c] - input_zero_point;
-      int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
-      int32_t unclamped_output_val = 128 + rescaled_diff;
-      int32_t output_val =
-          std::min(static_cast<int32_t>(255),
-                   std::max(static_cast<int32_t>(0), unclamped_output_val));
-      output_data[depth * i + c] = static_cast<uint8_t>(output_val);
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_

+ 0 - 69
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h

@@ -1,69 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
-
-#include <algorithm>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void LeakyRelu(const tflite::LeakyReluParams& params,
-                      const RuntimeShape& input_shape, const float* input_data,
-                      const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const float val = input_data[i];
-    // Note that alpha might be > 1 or < 0, so we don't use std::max here.
-    output_data[i] = val > 0 ? val : val * params.alpha;
-  }
-}
-
-template <typename T>
-inline void QuantizeLeakyRelu(const LeakyReluParams& params,
-                              const RuntimeShape& input_shape,
-                              const T* input_data,
-                              const RuntimeShape& output_shape,
-                              T* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  static const int32_t quantized_min = std::numeric_limits<T>::min();
-  static const int32_t quantized_max = std::numeric_limits<T>::max();
-  for (int i = 0; i < flat_size; ++i) {
-    const int32_t input_value = input_data[i] - params.input_offset;
-    int32_t unclamped_output;
-    if (input_value >= 0) {
-      unclamped_output = params.output_offset +
-                         MultiplyByQuantizedMultiplier(
-                             input_value, params.output_multiplier_identity,
-                             params.output_shift_identity);
-    } else {
-      unclamped_output = params.output_offset +
-                         MultiplyByQuantizedMultiplier(
-                             input_value, params.output_multiplier_alpha,
-                             params.output_shift_alpha);
-    }
-    const T clamped_output =
-        std::min(quantized_max, std::max(quantized_min, unclamped_output));
-    output_data[i] = static_cast<T>(clamped_output);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_

+ 0 - 256
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h

@@ -1,256 +0,0 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
-
-#include <algorithm>
-#include <cstddef>
-#include <limits>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void LogSoftmax(const SoftmaxParams& params,
-                       const RuntimeShape& input_shape, const float* input_data,
-                       const RuntimeShape& output_shape, float* output_data) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-
-  for (int i = 0; i < outer_size; ++i) {
-    // Find max element value which we'll use to ensure numerical stability
-    // taking advantage of the following equality:
-    // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
-    float max = std::numeric_limits<float>::lowest();
-    for (int c = 0; c < depth; ++c) {
-      max = std::max(max, input_data[i * depth + c]);
-    }
-
-    // Compute sum.
-    float sum = 0.f;
-    for (int c = 0; c < depth; ++c) {
-      sum += std::exp(input_data[i * depth + c] - max);
-    }
-
-    // Compute result.
-    const float log_sum = std::log(sum);
-    for (int c = 0; c < depth; ++c) {
-      output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
-    }
-  }
-}
-
-inline void LogSoftmax(const SoftmaxParams& params,
-                       const RuntimeShape& input_shape,
-                       const uint8_t* input_data,
-                       const RuntimeShape& output_shape, uint8_t* output_data) {
-  const int32_t input_multiplier = params.input_multiplier;
-  const int32_t input_left_shift = params.input_left_shift;
-  const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
-  const int32_t reverse_scaling_right_shift =
-      params.reverse_scaling_right_shift;
-  const int diff_min = params.diff_min;
-  // The representation chosen for the input to the exp() function is Q5.26.
-  // We need to leave extra space since values that we skip might be as large
-  // as -32 before multiplying by input_beta_multiplier, and therefore as
-  // large as -16 afterwards.  Note that exp(-8) is definitely not
-  // insignificant to accumulation, but exp(-16) definitely is.
-  static constexpr int kScaledDiffIntegerBits = 5;
-  static constexpr int kAccumulationIntegerBits = 12;
-  static constexpr int kOutputIntegerBits = 4;
-  using FixedPointScaledDiff =
-      gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
-  using FixedPointAccum =
-      gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
-
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-
-  for (int i = 0; i < outer_size; ++i) {
-    uint8_t max_in_row = 0;
-    for (int c = 0; c < depth; ++c) {
-      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
-    }
-
-    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
-    for (int c = 0; c < depth; ++c) {
-      int32_t input_diff =
-          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min) {
-        const int32_t input_diff_rescaled =
-            MultiplyByQuantizedMultiplierGreaterThanOne(
-                input_diff, input_multiplier, input_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
-                                        exp_on_negative_values(scaled_diff_f8));
-      }
-    }
-
-    const int32_t fixed_log_sum_of_exps =
-        log_x_for_x_greater_than_or_equal_to_1<kScaledDiffIntegerBits>(
-            sum_of_exps)
-            .raw();
-
-    // rescaled_diff_min is smallest representable in
-    // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the
-    // log-sub-exps that will be subtracted in the loop.
-    //
-    // The thresholds diff_min, etc are negative.
-    const int rescaled_diff_min =
-        fixed_log_sum_of_exps + std::numeric_limits<int32_t>::lowest();
-    const int adjusted_diff_min =
-        std::max(static_cast<int32_t>(
-                     diff_min - 1),  // Note use of > below instead of >= above.
-                 MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                     rescaled_diff_min, reverse_scaling_divisor,
-                     -reverse_scaling_right_shift));
-
-    for (int c = 0; c < depth; ++c) {
-      int32_t input_diff =
-          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff > adjusted_diff_min) {
-        const int32_t input_diff_rescaled =
-            MultiplyByQuantizedMultiplierGreaterThanOne(
-                input_diff, input_multiplier, input_left_shift);
-        int32_t unsat_output =
-            gemmlowp::RoundingDivideByPOT(
-                (input_diff_rescaled - fixed_log_sum_of_exps),
-                31 - kScaledDiffIntegerBits - kOutputIntegerBits) +
-            255;
-
-        output_data[i * depth + c] = static_cast<uint8_t>(
-            std::max(std::min(unsat_output, static_cast<int32_t>(255)),
-                     static_cast<int32_t>(0)));
-      } else {
-        // Set output to smallest value.
-        output_data[i * depth + c] = 0;
-      }
-    }
-  }
-}
-
-template <typename T>
-inline void LogSoftmaxQuantized(const SoftmaxParams& params,
-                                const size_t outer_size, const size_t depth,
-                                const RuntimeShape& input_shape,
-                                const T* input_data,
-                                const RuntimeShape& output_shape,
-                                T* output_data) {
-  const int32_t input_multiplier = params.input_multiplier;
-  const int32_t input_left_shift = params.input_left_shift;
-  const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
-  const int32_t reverse_scaling_right_shift =
-      params.reverse_scaling_right_shift;
-  const int diff_min = params.diff_min;
-
-  static constexpr T kMinT8 = std::numeric_limits<T>::min();
-  static constexpr T kMaxT8 = std::numeric_limits<T>::max();
-  static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
-
-  // All IntegerBits must agree with Prepare function.
-  // Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
-  static constexpr int kInputIntegerBits = 5;
-  static constexpr int kAccumulationIntegerBits = 12;
-  static constexpr int kOutputIntegerBits = 4;
-  using F5 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
-  using F12 = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
-
-  for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) {
-    T max_in_row = kMinT8;
-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
-      max_in_row =
-          std::max(max_in_row, input_data[outer_index * depth + inner_index]);
-    }
-
-    // Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
-    F12 sum_of_exps_in_q12 = F12::FromRaw(0);
-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
-      int32_t input_diff =
-          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
-          max_in_row;
-      if (input_diff >= diff_min) {
-        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
-            input_diff, input_multiplier, input_left_shift);
-        sum_of_exps_in_q12 =
-            sum_of_exps_in_q12 +
-            gemmlowp::Rescale<kAccumulationIntegerBits>(
-                exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
-      }
-    }
-
-    const int32_t log_sum_of_exps_in_q5 =
-        log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
-            sum_of_exps_in_q12)
-            .raw();
-
-    // Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
-    // smallest representable in Q5.26 plus the log_sum_of_exps.
-    const int32_t shifted_log_sum_of_exps_in_q5 =
-        log_sum_of_exps_in_q5 + kMinInt32;
-    const int32_t adjusted_diff_min =
-        std::max(static_cast<int32_t>(diff_min - 1),
-                 MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
-                                               reverse_scaling_divisor,
-                                               -reverse_scaling_right_shift));
-
-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
-      int32_t input_diff =
-          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
-          max_in_row;
-      // Note use of > below instead of >= above.
-      if (input_diff > adjusted_diff_min) {
-        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
-            input_diff, input_multiplier, input_left_shift);
-
-        // Rescale and downcast.
-        int32_t output_in_q27 =
-            gemmlowp::RoundingDivideByPOT(
-                (input_diff_in_q5 - log_sum_of_exps_in_q5),
-                31 - kInputIntegerBits - kOutputIntegerBits) +
-            kMaxT8;
-
-        output_in_q27 =
-            std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxT8)),
-                     static_cast<int32_t>(kMinT8));
-        output_data[outer_index * depth + inner_index] =
-            static_cast<T>(output_in_q27);
-      } else {
-        output_data[outer_index * depth + inner_index] = kMinT8;
-      }
-    }
-  }
-}
-
-inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size,
-                       const size_t depth, const RuntimeShape& input_shape,
-                       const int8_t* input_data,
-                       const RuntimeShape& output_shape, int8_t* output_data) {
-  LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data,
-                      output_shape, output_data);
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_

+ 0 - 132
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h

@@ -1,132 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
-
-#include <cmath>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-#include "tensorflow/lite/kernels/op_macros.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
-                     const RuntimeShape& output_shape, float* output_data) {
-  const float cutoff_upper = 16.619047164916992188f;
-  const float cutoff_lower = -9.f;
-
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  // Rational for using approximation in reference kernel.
-  // 0. This approximation gives enough precision for float.
-  // 1. This works around an issue on an embedded chipset where exp() does not
-  // return correctly as expected - exp(x) should return inf when overflown
-  // not 1.701417   IEEE 754 defines representation for inf.
-  // 2. This will speed up calculation and is matching the behavior in the
-  // optimized kernels. (check the definition of scalar_logistic_op<float>)
-
-  for (int i = 0; i < flat_size; i++) {
-    float val = input_data[i];
-    float result;
-    if (val > cutoff_upper) {
-      result = 1.0f;
-    } else if (val < cutoff_lower) {
-      result = std::exp(val);
-    } else {
-      result = 1.f / (1.f + std::exp(-val));
-    }
-    output_data[i] = result;
-  }
-}
-
-// Convenience version that allows, for example, generated-code calls to be
-// uniform between data types.
-inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
-                     const float* input_data, const RuntimeShape& output_shape,
-                     float* output_data) {
-  // Drop params: not needed.
-  Logistic(input_shape, input_data, output_shape, output_data);
-}
-
-inline void Logistic(const LogisticParams& params,
-                     const RuntimeShape& input_shape, const int16_t* input_data,
-                     const RuntimeShape& output_shape, int16_t* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    // F0 uses 0 integer bits, range [-1, 1].
-    // This is the return type of math functions such as tanh, logistic,
-    // whose range is in [-1, 1].
-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-    // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
-    using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
-
-    const F3 input = F3::FromRaw(input_data[i]);
-    F0 output = gemmlowp::logistic(input);
-    output_data[i] = output.raw();
-  }
-}
-
-// Quantized int8_t logistic activation.  Cheats by dequantizing and
-// requantizing around the floating point logistic method.  This implementation
-// is slow on platforms without a floating point unit.
-
-// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
-// approach used in TFLite for int8_t Logistic.
-inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
-                     float input_scale, int input_zero_point,
-                     const RuntimeShape& output_shape, int8_t* output_data,
-                     float output_scale, int output_zero_point) {
-  const float cutoff_upper = 16.619047164916992188f;
-  const float cutoff_lower = -9.f;
-
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  // Rational for using approximation in reference kernel.
-  // 0. This approximation gives enough precision for float.
-  // 1. This works around an issue on an embedded chipset where exp() does not
-  // return correctly as expected - exp(x) should return inf when overflown
-  // not 1.701417   IEEE 754 defines representation for inf.
-  // 2. This will speed up calculation and is matching the behavior in the
-  // optimized kernels. (check the definition of scalar_logistic_op<float>)
-
-  for (int i = 0; i < flat_size; i++) {
-    // Dequantize.
-    float val =
-        static_cast<float>((input_data[i] - input_zero_point) * input_scale);
-    float result;
-    if (val > cutoff_upper) {
-      result = 1.0f;
-    } else if (val < cutoff_lower) {
-      result = std::exp(val);
-    } else {
-      result = 1.f / (1.f + std::exp(-val));
-    }
-    // Requantize
-    int8_t output =
-        static_cast<int8_t>(result / output_scale + output_zero_point);
-    output_data[i] = output;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_

+ 0 - 422
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h

@@ -1,422 +0,0 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
-#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void LstmCell(
-    const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
-    const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
-    const float* prev_activ_data, const RuntimeShape& weights_shape,
-    const float* weights_data, const RuntimeShape& unextended_bias_shape,
-    const float* bias_data, const RuntimeShape& unextended_prev_state_shape,
-    const float* prev_state_data,
-    const RuntimeShape& unextended_output_state_shape, float* output_state_data,
-    const RuntimeShape& unextended_output_activ_shape, float* output_activ_data,
-    const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data,
-    const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) {
-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape prev_activ_shape =
-      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
-  const RuntimeShape bias_shape =
-      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
-  const RuntimeShape prev_state_shape =
-      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
-  const RuntimeShape output_state_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
-  const RuntimeShape output_activ_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
-  const RuntimeShape concat_temp_shape =
-      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
-  const RuntimeShape activ_temp_shape =
-      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
-
-  const int weights_dim_count = weights_shape.DimensionsCount();
-  const int batches =
-      MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
-                  output_state_shape, 0, output_activ_shape, 0);
-  const int height =
-      MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
-                  output_state_shape, 1, output_activ_shape, 1);
-  const int width =
-      MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
-                  output_state_shape, 2, output_activ_shape, 2);
-  const int input_depth = input_shape.Dims(3);
-  const int prev_activ_depth = prev_activ_shape.Dims(3);
-  const int total_input_depth = prev_activ_depth + input_depth;
-  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
-                   total_input_depth);
-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
-  const int intern_activ_depth =
-      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
-  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
-                   intern_activ_depth * total_input_depth);
-  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
-  const int output_depth =
-      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
-                  3, output_activ_shape, 3);
-  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
-
-  // Concatenate prev_activ and input data together
-  float const* concat_input_arrays_data[2] = {input_data, prev_activ_data};
-  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
-                                                       &prev_activ_shape};
-  tflite::ConcatenationParams concat_params;
-  concat_params.axis = 3;
-  concat_params.inputs_count = 2;
-  Concatenation(concat_params, concat_input_arrays_shapes,
-                concat_input_arrays_data, concat_temp_shape, concat_temp_data);
-
-  // Fully connected
-  tflite::FullyConnectedParams fc_params;
-  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
-  fc_params.float_activation_max = std::numeric_limits<float>::max();
-  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape,
-                 weights_data, bias_shape, bias_data, activ_temp_shape,
-                 activ_temp_data);
-
-  // Memory state update (the LSTM "guts")
-  for (int b = 0; b < batches; ++b) {
-    for (int w = 0; w < width; ++w) {
-      for (int h = 0; h < height; ++h) {
-        for (int c = 0; c < output_depth; ++c) {
-          const float input_gate =
-              1.f /
-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
-                                                      0 * output_depth + c)]));
-          const float new_input = std::tanh(activ_temp_data[Offset(
-              activ_temp_shape, b, h, w, 1 * output_depth + c)]);
-          const float forget_gate =
-              1.f /
-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
-                                                      2 * output_depth + c)]));
-          const float output_gate =
-              1.f /
-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
-                                                      3 * output_depth + c)]));
-          const float new_state =
-              input_gate * new_input +
-              forget_gate *
-                  prev_state_data[Offset(prev_state_shape, b, h, w, c)];
-          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
-          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
-              output_gate * std::tanh(new_state);
-        }
-      }
-    }
-  }
-}
-
-// Quantized LSTM cell implementation.
-// The quantization of the input, output arrays is as follows:
-//  - The input activations are quantized as uint8 on the interval
-//    [-1, 127/128].
-//    The rationale for that is that is the natural interval for output
-//    activations (see next point) and these need to be concatenated together.
-//    We could accommodate different ranges by re-scaling, but we empirically
-//    found that setting the input activations range to be [-1, 127/128] in the
-//    first place, removing the need for re-scaling, greatly improves accuracy.
-//  - The output activations are quantized as uint8 on the interval
-//    [-1, 127/128].
-//    The rationale for that is that the definition of a LSTM cell makes them
-//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
-//    makes for simpler, more accurate fixed-point arithmetic.
-//  - The output-at-previous-timestep state array is obviously quantized as
-//    the output activations.
-//  - The internal LSTM memory (not the output-at-previous-timestep, the other
-//    internal state array) is int16-quantized and may use any power-of-two,
-//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
-//    StateIntegerBits below, see the below discussion of that template
-//    parameter ("The StateIntegerBits template parameter").
-//  - The output of the internal fully-connected node is int16-quantized
-//    on the interval [-8, 8 * 32767/32768], the rationale for which is
-//    explained just below ("Why [-8, 8] for fully-connected output?").
-//
-//
-// === The StateIntegerBits template parameter ===
-//
-// The StateIntegerBits template parameter controls the fixed-point format used
-// to represent the internal memory of the LSTM cell (not the
-// output-at-previous-timestep, the other internal state array). It's currently
-// a template parameter so that the model can control that. The most typical
-// value for StateIntegerBits is 4. Other plausible values are anywhere between
-// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
-// and drop that template parameter. The reason why it can't be a runtime
-// parameter is that this controls the fixed-point format used, i.e. we need to
-// generate actually different code based on it. In particular, we generate code
-// for a fixed-point tanh() implementation for that format, which internally
-// uses a fixed-point exp() implementation, which internally uses a
-// barrel-shifter with a number of steps that depends on StateIntegerBits.
-// Another consequence of that is that a higher value of StateIntegerBits
-// results in a more expensive implementation (more barrel shifter steps
-// needed).
-//
-//
-// === Why [-8, 8] for fully-connected output? ===
-//
-// This array is only fed to Logistic and Tanh functions, for which
-// the quantized implementation will want to use fixed-point arithmetic,
-// requiring a power-of-two representation interval. Thus, we should right
-// away quantize this array to a power-of-two interval; otherwise,
-// implementation will need to rescale that, losing any benefit that a tighter
-// representation interval might otherwise yield, while introducing some
-// numerical error and computational overhead.
-//
-// Now, Logistic and Tanh
-// are nearly constant (nearly equal to their horizontal asymptotes)
-// outside of a small bounded interval around 0:
-//
-//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
-//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
-//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
-//
-// From this, we see that clamping to [-4, 4] would be too inaccurate
-// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
-// while clamping to [-16, 16] would make no difference even in float32.
-// However, for a fixed-point implementation in 16-bit integers, using 5
-// integer bits to represent the [-16, 16] range would leave only 11
-// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
-// representable values. Notice that is higher than the
-// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
-// Using [-8, 8] thus seems like the better compromise overall, enjoying
-// an increment of 2.4e-4 between representable values and a worst-case
-// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
-// [-16, 16].
-//
-// Moreover, all other things being equal, it is nice to choose the narrower
-// representation range, as that makes the implementation of fixed-point
-// math functions a little cheaper (each integer bit requires an additional
-// barrel-shifter atep in the implementation of exp(-x)). That is further
-// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
-// sense for 32-bit float or 32-bit fixed-point quantization, but we are
-// aiming for 16-bit fixed-point quantization of these internal nodes here.
-//
-template <int StateIntegerBits>
-inline void LstmCell(const LstmCellParams& params,
-                     const RuntimeShape& unextended_input_shape,
-                     const uint8_t* input_data_uint8,
-                     const RuntimeShape& unextended_prev_activ_shape,
-                     const uint8_t* prev_activ_data_uint8,
-                     const RuntimeShape& weights_shape,
-                     const uint8_t* weights_data_uint8,
-                     const RuntimeShape& unextended_bias_shape,
-                     const int32_t* bias_data_int32,
-                     const RuntimeShape& unextended_prev_state_shape,
-                     const int16_t* prev_state_data_int16,
-                     const RuntimeShape& unextended_output_state_shape,
-                     int16_t* output_state_data_int16,
-                     const RuntimeShape& unextended_output_activ_shape,
-                     uint8_t* output_activ_data_uint8,
-                     const RuntimeShape& unextended_concat_temp_shape,
-                     uint8_t* concat_temp_data_uint8,
-                     const RuntimeShape& unextended_activ_temp_shape,
-                     int16_t* activ_temp_data_int16, void* gemmlowp_context) {
-  (void)gemmlowp_context;  // only used in optimized code.
-  int32_t weights_zero_point = params.weights_zero_point;
-  int32_t accum_multiplier = params.accum_multiplier;
-  int accum_shift = params.accum_shift;
-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape prev_activ_shape =
-      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
-  const RuntimeShape bias_shape =
-      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
-  const RuntimeShape prev_state_shape =
-      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
-  const RuntimeShape output_state_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
-  const RuntimeShape output_activ_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
-  const RuntimeShape concat_temp_shape =
-      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
-  const RuntimeShape activ_temp_shape =
-      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
-
-  // Gather dimensions information, and perform consistency checks.
-  const int weights_dim_count = weights_shape.DimensionsCount();
-  const int outer_size = MatchingFlatSizeSkipDim(
-      input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape,
-      output_activ_shape);
-  const int input_depth = input_shape.Dims(3);
-  const int prev_activ_depth = prev_activ_shape.Dims(3);
-  const int total_input_depth = prev_activ_depth + input_depth;
-  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
-                   total_input_depth);
-  const int intern_activ_depth =
-      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
-  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
-                   intern_activ_depth * total_input_depth);
-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
-  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
-  const int output_depth =
-      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
-                  3, output_activ_shape, 3);
-  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
-  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
-  const int fc_output_depth =
-      MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
-  const int fc_accum_depth = total_input_depth;
-  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
-
-  // Depth-concatenate prev_activ and input data together.
-  uint8_t const* concat_input_arrays_data[2] = {input_data_uint8,
-                                                prev_activ_data_uint8};
-  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
-                                                       &prev_activ_shape};
-  tflite::ConcatenationParams concat_params;
-  concat_params.axis = 3;
-  concat_params.inputs_count = 2;
-  Concatenation(concat_params, concat_input_arrays_shapes,
-                concat_input_arrays_data, concat_temp_shape,
-                concat_temp_data_uint8);
-
-  // Implementation of the fully connected node inside the LSTM cell.
-  // The operands are 8-bit integers, the accumulators are internally 32bit
-  // integers, and the output is 16-bit fixed-point with 3 integer bits so
-  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
-  // is explained in the function comment above.
-  for (int b = 0; b < fc_batches; ++b) {
-    for (int out_c = 0; out_c < fc_output_depth; ++out_c) {
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32_t accum = bias_data_int32[out_c];
-      // Accumulation loop.
-      for (int d = 0; d < fc_accum_depth; ++d) {
-        int16_t input_val =
-            concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
-        int16_t weights_val =
-            weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
-        accum += input_val * weights_val;
-      }
-      // Down-scale the final int32 accumulator to the scale used by our
-      // (16-bit, using 3 integer bits) fixed-point format. The quantized
-      // multiplier and shift here have been pre-computed offline
-      // (e.g. by toco).
-      accum =
-          MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
-      // Saturate, cast to int16, and store to the temporary activations array.
-      accum = std::max(-32768, std::min(32767, accum));
-      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
-    }
-  }
-
-  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
-  // and muls, all done in 16-bit fixed-point.
-  for (int b = 0; b < outer_size; ++b) {
-    for (int c = 0; c < output_depth; ++c) {
-      // Define the fixed-point data types that we will use here. All use
-      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
-      // They only differ by the number of integral vs. fractional bits,
-      // determining the range of values that they can represent.
-      //
-      // F0 uses 0 integer bits, range [-1, 1].
-      // This is the return type of math functions such as tanh, logistic,
-      // whose range is in [-1, 1].
-      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-      // F3 uses 3 integer bits, range [-8, 8].
-      // This is the range of the previous fully-connected node's output,
-      // which is our input here.
-      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
-      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
-      // 2^StateIntegerBits]. It's used to represent the internal state, whose
-      // number of integer bits is currently dictated by the model. See comment
-      // on the StateIntegerBits template parameter above.
-      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
-      // Implementation of input gate, using fixed-point logistic function.
-      F3 input_gate_input = F3::FromRaw(
-          activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
-      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
-      // Implementation of input modulation gate, using fixed-point tanh
-      // function.
-      F3 input_modulation_gate_input = F3::FromRaw(
-          activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
-      F0 input_modulation_gate_output =
-          gemmlowp::tanh(input_modulation_gate_input);
-      // Implementation of forget gate, using fixed-point logistic function.
-      F3 forget_gate_input = F3::FromRaw(
-          activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
-      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
-      // Implementation of output gate, using fixed-point logistic function.
-      F3 output_gate_input = F3::FromRaw(
-          activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
-      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
-      // Implementation of internal multiplication nodes, still in fixed-point.
-      F0 input_times_input_modulation =
-          input_gate_output * input_modulation_gate_output;
-      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
-      FS prev_state_times_forget_state = forget_gate_output * prev_state;
-      // Implementation of internal addition node, saturating.
-      FS new_state = gemmlowp::SaturatingAdd(
-          gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
-          prev_state_times_forget_state);
-      // Implementation of last internal Tanh node, still in fixed-point.
-      // Since a Tanh fixed-point implementation is specialized for a given
-      // number or integer bits, and each specialization can have a substantial
-      // code size, and we already used above a Tanh on an input with 3 integer
-      // bits, and per the table in the above function comment there is no
-      // significant accuracy to be lost by clamping to [-8, +8] for a
-      // 3-integer-bits representation, let us just do that. This helps people
-      // porting this to targets where code footprint must be minimized.
-      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
-      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
-      // Store the new internal state back to memory, as 16-bit integers.
-      // Note: here we store the original value with StateIntegerBits, not
-      // the rescaled 3-integer-bits value fed to tanh.
-      output_state_data_int16[b * output_depth + c] = new_state.raw();
-      // Down-scale the output activations to 8-bit integers, saturating,
-      // and store back to memory.
-      int16_t rescaled_output_activ =
-          gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
-      int16_t clamped_output_activ = std::max<int16_t>(
-          -128, std::min<int16_t>(127, rescaled_output_activ));
-      output_activ_data_uint8[b * output_depth + c] =
-          128 + clamped_output_activ;
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_

+ 0 - 64
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h

@@ -1,64 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename T, typename Op, int N = 5>
-void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
-                                 const T* input1_data,
-                                 const RuntimeShape& unextended_input2_shape,
-                                 const T* input2_data,
-                                 const RuntimeShape& unextended_output_shape,
-                                 T* output_data, Op op) {
-  // Uses element-wise calculation if broadcast is not required.
-  if (unextended_input1_shape == unextended_input2_shape) {
-    const int flat_size =
-        MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
-                             unextended_output_shape);
-    for (int i = 0; i < flat_size; ++i) {
-      output_data[i] = op(input1_data[i], input2_data[i]);
-    }
-  } else {
-    TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
-    TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
-    TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
-
-    NdArrayDesc<N> desc1;
-    NdArrayDesc<N> desc2;
-    NdArrayDesc<N> output_desc;
-    NdArrayDescsForElementwiseBroadcast(
-        unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
-    CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
-                   &output_desc);
-
-    auto maxmin_func = [&](int indexes[N]) {
-      output_data[SubscriptToIndex(output_desc, indexes)] =
-          op(input1_data[SubscriptToIndex(desc1, indexes)],
-             input2_data[SubscriptToIndex(desc2, indexes)]);
-    };
-    NDOpsHelper<N>(output_desc, maxmin_func);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_

+ 0 - 37
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h

@@ -1,37 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-inline void Negate(const RuntimeShape& input_shape, const T* input_data,
-                   const RuntimeShape& output_shape, T* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = -input_data[i];
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_

+ 0 - 169
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h

@@ -1,169 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
-
-#include <vector>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// TFLite Pad supports activation tensors with up to 5 dimensions.
-constexpr int PadKernelMaxDimensionCount() { return 5; }
-
-// There are two versions of pad: Pad and PadV2.  In PadV2 there is a second
-// scalar input that provides the padding value.  Therefore pad_value_ptr can be
-// equivalent to a simple input1_data.  For Pad, it should point to a zero
-// value.
-//
-// Note that two typenames are required, so that T=P=int32_t is considered a
-// specialization distinct from P=int32_t.
-template <typename T, typename P>
-inline void PadImpl(const tflite::PadParams& op_params,
-                    const RuntimeShape& input_shape, const T* input_data,
-                    const P* pad_value_ptr, const RuntimeShape& output_shape,
-                    T* output_data) {
-  const RuntimeShape ext_input_shape =
-      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
-  const RuntimeShape ext_output_shape =
-      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
-  TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
-  TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
-
-  // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
-  // pad them to 5 dims (yes, we are "padding the padding").
-  int left_padding_copy[PadKernelMaxDimensionCount()];
-  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
-    left_padding_copy[i] = 0;
-  }
-  for (int i = 0; i < op_params.left_padding_count; ++i) {
-    left_padding_copy[i + PadKernelMaxDimensionCount() -
-                      op_params.left_padding_count] = op_params.left_padding[i];
-  }
-  int right_padding_copy[PadKernelMaxDimensionCount()];
-  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
-    right_padding_copy[i] = 0;
-  }
-  for (int i = 0; i < op_params.right_padding_count; ++i) {
-    right_padding_copy[i + PadKernelMaxDimensionCount() -
-                       op_params.right_padding_count] =
-        op_params.right_padding[i];
-  }
-
-  const int output_batch = ext_output_shape.Dims(0);
-  const int output_plane = ext_output_shape.Dims(1);
-  const int output_height = ext_output_shape.Dims(2);
-  const int output_width = ext_output_shape.Dims(3);
-  const int output_depth = ext_output_shape.Dims(4);
-
-  const int left_b_padding = left_padding_copy[0];
-  const int left_p_padding = left_padding_copy[1];
-  const int left_h_padding = left_padding_copy[2];
-  const int left_w_padding = left_padding_copy[3];
-  const int left_d_padding = left_padding_copy[4];
-
-  const int right_b_padding = right_padding_copy[0];
-  const int right_p_padding = right_padding_copy[1];
-  const int right_h_padding = right_padding_copy[2];
-  const int right_w_padding = right_padding_copy[3];
-  const int right_d_padding = right_padding_copy[4];
-
-  const T pad_value = *pad_value_ptr;
-
-  const T* in_ptr = input_data;
-  T* out_ptr = output_data;
-  for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_p = 0; out_p < output_plane; ++out_p) {
-      for (int out_h = 0; out_h < output_height; ++out_h) {
-        for (int out_w = 0; out_w < output_width; ++out_w) {
-          for (int out_d = 0; out_d < output_depth; ++out_d) {
-            if (out_b < left_b_padding ||
-                out_b >= output_batch - right_b_padding ||
-                out_p < left_p_padding ||
-                out_p >= output_plane - right_p_padding ||
-                out_h < left_h_padding ||
-                out_h >= output_height - right_h_padding ||
-                out_w < left_w_padding ||
-                out_w >= output_width - right_w_padding ||
-                out_d < left_d_padding ||
-                out_d >= output_depth - right_d_padding) {
-              *out_ptr++ = pad_value;
-            } else {
-              *out_ptr++ = *in_ptr++;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <typename T, typename P>
-inline void Pad(const tflite::PadParams& op_params,
-                const RuntimeShape& input_shape, const T* input_data,
-                const P* pad_value_ptr, const RuntimeShape& output_shape,
-                T* output_data) {
-  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
-          output_data);
-}
-
-// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
-template <typename T>
-inline void Pad(const tflite::PadParams& op_params,
-                const RuntimeShape& input_shape, const T* input_data,
-                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
-                T* output_data) {
-  const T converted_pad_value = static_cast<T>(*pad_value_ptr);
-  PadImpl(op_params, input_shape, input_data, &converted_pad_value,
-          output_shape, output_data);
-}
-
-// This version avoids conflicting template matching.
-template <>
-inline void Pad(const tflite::PadParams& op_params,
-                const RuntimeShape& input_shape, const int32_t* input_data,
-                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
-                int32_t* output_data) {
-  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
-          output_data);
-}
-
-template <typename T, typename P>
-inline void PadImageStyle(const tflite::PadParams& op_params,
-                          const RuntimeShape& input_shape, const T* input_data,
-                          const P* pad_value_ptr,
-                          const RuntimeShape& output_shape, T* output_data) {
-  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
-      output_data);
-}
-
-template <typename P>
-inline void PadImageStyle(const tflite::PadParams& op_params,
-                          const RuntimeShape& input_shape,
-                          const float* input_data, const P* pad_value_ptr,
-                          const RuntimeShape& output_shape,
-                          float* output_data) {
-  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
-      output_data);
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_

+ 0 - 303
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h

@@ -1,303 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
-
-#include <algorithm>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline bool AveragePool(const PoolParams& params,
-                        const RuntimeShape& input_shape,
-                        const float* input_data,
-                        const RuntimeShape& output_shape, float* output_data) {
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          float total = 0.f;
-          float filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              total +=
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              filter_count++;
-            }
-          }
-          if (filter_count == 0) return false;
-          const float average = total / filter_count;
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              ActivationFunctionWithMinMax(average, params.float_activation_min,
-                                           params.float_activation_max);
-        }
-      }
-    }
-  }
-  return true;
-}
-
-inline bool AveragePool(const PoolParams& params,
-                        const RuntimeShape& input_shape,
-                        const uint8_t* input_data,
-                        const RuntimeShape& output_shape,
-                        uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int32_t acc = 0;
-          int filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              acc +=
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              filter_count++;
-            }
-          }
-          if (filter_count == 0) return false;
-          acc = (acc + filter_count / 2) / filter_count;
-          acc = std::max(acc, params.quantized_activation_min);
-          acc = std::min(acc, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<uint8_t>(acc);
-        }
-      }
-    }
-  }
-  return true;
-}
-
-inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
-                   const float* input_data, const RuntimeShape& output_shape,
-                   float* output_data) {
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          float sum_squares = 0.f;
-          int filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              const float val =
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              sum_squares += val * val;
-              filter_count++;
-            }
-          }
-          const float l2pool_result = std::sqrt(sum_squares / filter_count);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              ActivationFunctionWithMinMax(l2pool_result,
-                                           params.float_activation_min,
-                                           params.float_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
-                    const float* input_data, const RuntimeShape& output_shape,
-                    float* output_data) {
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          float max = std::numeric_limits<float>::lowest();
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              max = std::max(
-                  max,
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
-            }
-          }
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              ActivationFunctionWithMinMax(max, params.float_activation_min,
-                                           params.float_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
-                    const uint8_t* input_data, const RuntimeShape& output_shape,
-                    uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
-  TFLITE_DCHECK_LE(params.quantized_activation_max, 255);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          uint8_t max = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              max = std::max(
-                  max,
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
-            }
-          }
-          max = std::max<uint8_t>(max, params.quantized_activation_min);
-          max = std::min<uint8_t>(max, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<uint8_t>(max);
-        }
-      }
-    }
-  }
-}
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_

+ 0 - 809
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc

@@ -1,809 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <cstring>
-#include <limits>
-#include <utility>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
-
-#if defined(_MSC_VER)
-#define __restrict__ __restrict
-#endif
-
-namespace tflite {
-namespace tensor_utils {
-
-namespace {
-const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
-const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
-}  // namespace
-
-void PortableSymmetricQuantizeFloats(const float* values, const int size,
-                                     int8_t* quantized_values, float* min_value,
-                                     float* max_value, float* scaling_factor) {
-  auto minmax = std::minmax_element(values, values + size);
-  *min_value = *minmax.first;
-  *max_value = *minmax.second;
-
-  PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value,
-                                  *max_value, scaling_factor);
-}
-
-void PortableSymmetricQuantizeFloats(const float* values, const int size,
-                                     int8_t* quantized_values, float min_value,
-                                     float max_value, float* scaling_factor) {
-  const int32_t kScale = 127;
-  const float range = std::max(std::abs(min_value), std::abs(max_value));
-  if (range == 0) {
-    memset(quantized_values, 0, size * sizeof(int8_t));
-    *scaling_factor = 1;
-    return;
-  }
-  *scaling_factor = range / kScale;
-  const float scaling_factor_inv = kScale / range;
-  for (int i = 0; i < size; ++i) {
-    const int32_t quantized_value =
-        static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
-    // Clamp: just in case some odd numeric offset.
-    quantized_values[i] = static_cast<int8_t>(
-        std::min(kScale, std::max(-kScale, quantized_value)));
-  }
-}
-
-void PortableAsymmetricQuantizeFloats(const float* values, const int size,
-                                      int8_t* quantized_values,
-                                      float* scaling_factor, int32_t* offset) {
-  const int32_t kMinScale = -128;
-  const int32_t kMaxScale = 127;
-  const double qmin_double = kMinScale;
-  const double qmax_double = kMaxScale;
-  const auto minmax = std::minmax_element(values, values + size);
-  const double rmin = static_cast<double>(std::min(0.0f, *minmax.first));
-  const double rmax = static_cast<double>(std::max(0.0f, *minmax.second));
-  if (rmin == rmax) {
-    memset(quantized_values, 0, size * sizeof(int8_t));
-    *scaling_factor = 1;
-    *offset = 0;
-    return;
-  } else {
-    double scale = (rmax - rmin) / (qmax_double - qmin_double);
-    const double zero_point_from_min = qmin_double - rmin / scale;
-    const double zero_point_from_max = qmax_double - rmax / scale;
-    const double zero_point_from_min_error =
-        std::abs(qmin_double) + std::abs(rmin / scale);
-    const double zero_point_from_max_error =
-        std::abs(qmax_double) + std::abs(rmax / scale);
-    const double zero_point_double =
-        zero_point_from_min_error < zero_point_from_max_error
-            ? zero_point_from_min
-            : zero_point_from_max;
-    int8_t nudged_zero_point = 0;
-    if (zero_point_double <= qmin_double) {
-      nudged_zero_point = kMinScale;
-    } else if (zero_point_double >= qmax_double) {
-      nudged_zero_point = kMaxScale;
-    } else {
-      nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
-    }
-    *scaling_factor = scale;
-    *offset = nudged_zero_point;
-  }
-  const float scaling_factor_inv = 1.0f / *scaling_factor;
-  for (int i = 0; i < size; ++i) {
-    const int32_t quantized_value = static_cast<int32_t>(
-        TfLiteRound(*offset + values[i] * scaling_factor_inv));
-    quantized_values[i] =
-        std::min(kMaxScale, std::max(kMinScale, quantized_value));
-  }
-}
-
-void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
-                                                 int m_rows, int m_cols,
-                                                 const float* vector,
-                                                 int n_batch, float* result) {
-  float* result_in_batch = result;
-  for (int b = 0; b < n_batch; b++) {
-    const float* matrix_ptr = matrix;
-    for (int r = 0; r < m_rows; r++) {
-      float dot_prod = 0.0f;
-      const float* vector_in_batch = vector + b * m_cols;
-      for (int c = 0; c < m_cols; c++) {
-        dot_prod += *matrix_ptr++ * *vector_in_batch++;
-      }
-      *result_in_batch += dot_prod;
-      ++result_in_batch;
-    }
-  }
-}
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
-    const int8_t* __restrict__ vectors, const float* scaling_factors,
-    int n_batch, float* __restrict__ result) {
-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
-    const float batch_scaling_factor = scaling_factors[batch];
-    // Get the address of the first row.
-    const int8_t* row_ptr = matrix;
-    for (int row = 0; row < m_rows; ++row) {
-      // Initialize the dot product sum for the row to 0.
-      int32_t dotprod = 0;
-#if defined(__GNUC__)
-      // Prefetch the row to cache.
-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
-                         3 /* temporal locality */);
-#endif
-      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
-        dotprod += (*row_ptr) * (vectors[col]);
-      }  // for col
-      *result += dotprod * batch_scaling_factor;
-      ++result;
-    }  // for row
-  }    // for batch
-}
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
-    const int8_t* __restrict__ vectors, const float* scaling_factors,
-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
-    bool* compute_row_sums, CpuBackendContext* context) {
-  if (input_offset == nullptr) {
-    PortableMatrixBatchVectorMultiplyAccumulate(
-        matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
-    return;
-  }
-  if (!compute_row_sums || *compute_row_sums) {
-    PortableReductionSumVector(matrix, row_sums, m_rows, m_cols);
-    if (compute_row_sums) {
-      *compute_row_sums = false;
-    }
-  }
-
-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
-    const float batch_scaling_factor = scaling_factors[batch];
-    const int32_t batch_offset = input_offset[batch];
-    const int8_t* row_ptr = matrix;
-    for (int row = 0; row < m_rows; ++row) {
-      int32_t dotprod = 0;
-      float scale = batch_scaling_factor;
-      if (per_channel_scale) {
-        scale *= per_channel_scale[row];
-      }
-#if defined(__GNUC__)
-      // Prefetch the row to cache.
-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
-                         3 /* temporal locality */);
-#endif
-      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
-        dotprod += (*row_ptr) * vectors[col];
-      }  // for col
-      dotprod -= row_sums[row] * batch_offset;
-      *result += dotprod * scale;
-      ++result;
-    }  // for row
-  }    // for batch
-}
-
-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
-    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
-  const int kBlockSize = 4;
-  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
-  for (int batch = 0; batch < n_batch; batch++) {
-    const float* matrix_ptr = matrix;
-    for (int row = 0; row < m_rows; row++) {
-      float dot_prod = 0.0f;
-      const float* vector_in_batch = vector + batch * m_cols;
-      for (int i = segments[row]; i < segments[row + 1]; i++) {
-        const int block_start_index = indices[i] * kBlockSize;
-        const float* vector_block_in_batch_ptr =
-            vector_in_batch + block_start_index;
-        for (int c = 0; c < kBlockSize; c++) {
-          dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
-        }
-      }
-      result[batch * m_rows + row] += dot_prod;
-    }
-  }
-}
-
-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
-    const int32_t output_shift, const int32_t output_offset,
-    const int32_t output_activation_min, const int32_t output_activation_max,
-    int8_t* __restrict__ result) {
-  const int kBlockSize = 16;
-  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
-  for (int batch = 0; batch < n_batch; ++batch) {
-    const int8_t* matrix_ptr = matrix;
-    for (int row = 0; row < m_rows; ++row) {
-      int32_t dot_prod = 0;
-      const int8_t* vector_in_batch = vector + batch * m_cols;
-      for (int i = segments[row]; i < segments[row + 1]; ++i) {
-        const int block_start_index = indices[i] * kBlockSize;
-        const int8_t* vector_block_in_batch_ptr =
-            vector_in_batch + block_start_index;
-        for (int c = 0; c < kBlockSize; c++) {
-          dot_prod += *matrix_ptr * *vector_block_in_batch_ptr++;
-          dot_prod += *matrix_ptr++ * input_offset;
-        }
-      }
-      const int32_t bias_value = bias_vector != nullptr ? bias_vector[row] : 0;
-      dot_prod = MultiplyByQuantizedMultiplier(dot_prod + bias_value,
-                                               output_multiplier, output_shift);
-      dot_prod += output_offset;
-      result[batch * m_rows + row] =
-          static_cast<int8_t>(ActivationFunctionWithMinMax(
-              dot_prod, output_activation_min, output_activation_max));
-    }
-  }
-}
-
-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
-    float* __restrict__ result) {
-  const int kBlockSize = 16;
-  TFLITE_DCHECK_EQ(  // NOLINT
-      m_cols % kBlockSize, 0);
-  for (int batch = 0; batch < n_batch; batch++) {
-    const float* matrix_ptr = matrix;
-    const uint8_t* ledger_ptr = ledger;
-    for (int row = 0; row < m_rows; row++) {
-      float dot_prod = 0.0f;
-      int num_nonzero_blocks = *ledger_ptr++;
-      if (num_nonzero_blocks > 0) {
-        const float* vector_in_batch = vector + batch * m_cols;
-        for (int i = 0; i < num_nonzero_blocks; i++) {
-          const int block_start_index = *ledger_ptr++ * kBlockSize;
-          const float* vector_block_in_batch_ptr =
-              vector_in_batch + block_start_index;
-          for (int c = 0; c < kBlockSize; c++) {
-            dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
-          }
-        }
-      }
-      result[batch * m_rows + row] += dot_prod;
-    }
-  }
-}
-
-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
-    const int m_cols, const int8_t* __restrict__ vectors,
-    const float* scaling_factors, int n_batch, float* __restrict__ result) {
-  static const int kBlockSize = 16;
-  TFLITE_DCHECK_EQ(  // NOLINT
-      m_cols % kBlockSize, 0);
-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
-    const float batch_scaling_factor = scaling_factors[batch];
-    const uint8_t* ledger_ptr = ledger;
-    // Get the address of the first row.
-    const int8_t* row_ptr = matrix;
-    for (int row = 0; row < m_rows; ++row) {
-      // Initialize the dot product sum for the row to 0.
-      int32_t dotprod = 0;
-#if defined(__GNUC__)
-      // Prefetch the row to cache.
-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
-                         3 /* temporal locality */);
-#endif
-      int num_nonzero_blocks = *ledger_ptr++;
-      for (int i = 0; i < num_nonzero_blocks; i++) {
-        const int block_start_index = *ledger_ptr++ * kBlockSize;
-        const int8_t* vector_block_ptr = vectors + block_start_index;
-        for (int c = 0; c < kBlockSize; c++) {
-          dotprod += (*row_ptr++) * (*vector_block_ptr++);
-        }  // for block
-      }    // for num_nonzero_blocks
-      result[batch * m_rows + row] += dotprod * batch_scaling_factor;
-    }  // for row
-  }    // for batch
-}
-
-template <typename T>
-void PortableMatrixBatchVectorMultiplyAccumulateImpl(
-    const int8_t* input, const int32_t* bias,
-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
-    T* output) {
-  const int16_t output_max = std::numeric_limits<T>::max();
-  const int16_t output_min = std::numeric_limits<T>::min();
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int row = 0; row < n_output; ++row) {
-      int32_t acc = bias[row];
-      for (int col = 0; col < n_input; ++col) {
-        int8_t input_val = input[batch * n_input + col];
-        int8_t weights_val = input_to_gate_weights[row * n_input + col];
-        acc += input_val * weights_val;
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
-      acc += output_zp;
-      acc += output[batch * n_output + row];
-      if (acc > output_max) {
-        acc = output_max;
-      }
-      if (acc < output_min) {
-        acc = output_min;
-      }
-      output[batch * n_output + row] = static_cast<T>(acc);
-    }
-  }
-}
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* input, const int32_t* bias,
-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
-    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
-  PortableMatrixBatchVectorMultiplyAccumulateImpl(
-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
-      n_output, output_zp, output);
-}
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* input, const int32_t* bias,
-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
-    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
-  PortableMatrixBatchVectorMultiplyAccumulateImpl(
-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
-      n_output, output_zp, output);
-}
-
-void PortableMatrixBatchVectorMultiply(const int8_t* input,
-                                       int32_t input_zeropoint,
-                                       const int8_t* input_to_gate_weights,
-                                       int32_t input_to_gate_effective_scale_a,
-                                       int32_t input_to_gate_effective_scale_b,
-                                       int32_t n_batch, int32_t n_input,
-                                       int32_t n_cell, int8_t* gate_output,
-                                       int8_t gate_output_zp) {
-  const int32_t int8_max = std::numeric_limits<int8_t>::max();
-  const int32_t int8_min = std::numeric_limits<int8_t>::min();
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int row = 0; row < n_cell; ++row) {
-      int32_t acc = 0;
-      for (int col = 0; col < n_input; ++col) {
-        int32_t input_val = input[batch * n_input + col];
-        int8_t weights_val = input_to_gate_weights[row * n_input + col];
-        acc += (input_val - input_zeropoint) * weights_val;
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a,
-                                          input_to_gate_effective_scale_b);
-      acc += gate_output_zp;
-      if (acc > int8_max) {
-        acc = int8_max;
-      }
-      if (acc < int8_min) {
-        acc = int8_min;
-      }
-      gate_output[batch * n_cell + row] = static_cast<int8_t>(acc);
-    }
-  }
-}
-
-void PortableMatrixBatchVectorMultiply(
-    const int16_t* hidden, const int8_t* hidden_to_output_weights,
-    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
-    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
-    int32_t n_output, int32_t output_zp, int8_t* proj_output) {
-  const int16_t int8_max = std::numeric_limits<int8_t>::max();
-  const int16_t int8_min = std::numeric_limits<int8_t>::min();
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int row = 0; row < n_output; ++row) {
-      int64_t acc = gate_bias[row];
-      for (int col = 0; col < n_hidden; ++col) {
-        int16_t input_val = hidden[batch * n_hidden + col];
-        int8_t weights_val = hidden_to_output_weights[row * n_hidden + col];
-        int64_t curr = acc;
-        acc += input_val * weights_val;
-        if (input_val * weights_val > 0 && acc < curr) {
-          acc = std::numeric_limits<int32_t>::max();
-        }
-        if (input_val * weights_val < 0 && acc > curr) {
-          acc = std::numeric_limits<int32_t>::min();
-        }
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a,
-                                          proj_effective_scale_b);
-      acc += output_zp;
-      if (acc > int8_max) {
-        acc = int8_max;
-      }
-      if (acc < int8_min) {
-        acc = int8_min;
-      }
-      proj_output[batch * n_output + row] = acc;
-    }
-  }
-}
-
-void PortableApplyLayerNorm(const int16_t* input,
-                            const int16_t* layer_norm_weights,
-                            const int32_t* bias, int32_t layer_norm_scale_a,
-                            int32_t layer_norm_scale_b, int32_t variance_limit,
-                            int n_batch, int n_input, int16_t* output) {
-  // The square of std::pow(2, 10), which is the extra factor that makes sure
-  // normalized values has enough resolution.
-  static const int kTwoToPower20 = 1 << 20;
-  for (int i = 0; i < n_batch; ++i) {
-    int64_t sum = 0;
-    int64_t sum_sq = 0;
-    for (int j = 0; j < n_input; ++j) {
-      const int32_t index = i * n_input + j;
-      int32_t val = static_cast<int32_t>(input[index]);
-      sum += val;
-      sum_sq += val * val;
-    }
-    int32_t mean =
-        static_cast<int32_t>(static_cast<int64_t>(sum) * 1024 / n_input);
-    // TODO(b/173994730): Avoids overflow but only works for POT n_input.
-    int32_t temp = kTwoToPower20 / n_input;
-    int64_t variance =
-        sum_sq * temp - static_cast<int64_t>(mean) * static_cast<int64_t>(mean);
-    int32_t variance2 = static_cast<int32_t>(variance / kTwoToPower20);
-    if (variance2 < 1) {
-      variance2 = variance_limit;
-    }
-    int32_t stddev_inverse_a;
-    int stddev_inverse_b;
-    GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1,
-                                     &stddev_inverse_a, &stddev_inverse_b);
-
-    for (int j = 0; j < n_input; ++j) {
-      const int32_t index = i * n_input + j;
-      int32_t val = static_cast<int32_t>(input[index]);
-      int32_t shifted = 1024 * val - mean;
-      int32_t rescaled = MultiplyByQuantizedMultiplier(
-          shifted, stddev_inverse_a, stddev_inverse_b);
-      // TODO(jianlijianli): Saturate this.
-      int64_t val3 = rescaled * layer_norm_weights[j] + bias[j];
-      int32_t val4 =
-          static_cast<int32_t>((val3 > 0 ? val3 + 512 : val3 - 512) / 1024);
-      int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a,
-                                                   layer_norm_scale_b + 12);
-      val5 = std::min(std::max(kInt16Min, val5), kInt16Max);
-      output[index] = static_cast<int16_t>(val5);
-    }
-  }
-}
-
-void PortableApplyLayerNormFloat(const int16_t* input,
-                                 const int16_t* layer_norm_weights,
-                                 int32_t layer_norm_scale_a,
-                                 int32_t layer_norm_scale_b,
-                                 const int32_t* bias, int n_batch, int n_input,
-                                 int16_t* output) {
-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
-  const float layer_norm_scale =
-      layer_norm_scale_a *
-      std::pow(2.0, static_cast<double>(layer_norm_scale_b - 31));
-  const float bias_scale =
-      static_cast<float>(std::pow(2.0, -10)) * layer_norm_scale;
-
-  for (int batch = 0; batch < n_batch; ++batch) {
-    float sum = 0.0f;
-    float sum_sq = 0.0f;
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      const float value = static_cast<float>(input[index]);
-      sum += value;
-      sum_sq += value * value;
-    }
-    const float mean = sum / n_input;
-    float stddev_inv = 0.0f;
-    const float variance = sum_sq / n_input - mean * mean;
-    if (variance == 0) {
-      stddev_inv = 1.0f / std::sqrt(1e-8f);
-    } else {
-      stddev_inv = 1.0f / std::sqrt(variance);
-    }
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      const float normalized_value =
-          (static_cast<float>(input[index]) - mean) * stddev_inv;
-      const float weighted_normalized_value =
-          normalized_value * layer_norm_weights[i] * layer_norm_scale +
-          bias[i] * bias_scale;
-      const int32_t quant_output = static_cast<int32_t>(round(
-          weighted_normalized_value * static_cast<float>(std::pow(2, 12))));
-      output[index] = std::min(int16_max, std::max(int16_min, quant_output));
-    }
-  }
-}
-
-void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
-                                            int32_t scalar, int32_t n_row,
-                                            int32_t n_col, int32_t* output) {
-  for (int i = 0; i < n_row; ++i) {
-    int32_t row_sum = 0;
-    for (int j = 0; j < n_col; ++j) {
-      row_sum += *matrix++;
-    }
-    output[i] += row_sum * scalar;
-  }
-}
-
-void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
-                          int32_t n_input, int16_t* output) {
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int c = 0; c < n_input; c++) {
-      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
-      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-      const int index = batch * n_input + c;
-      F3 sigmoid_input = F3::FromRaw(input[index]);
-      F0 sigmoid_output = gemmlowp::logistic(sigmoid_input);
-      output[index] = sigmoid_output.raw();
-    }
-  }
-}
-
-void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
-                               int32_t n_input, int16_t* output) {
-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      const float float_input =
-          input[index] * static_cast<float>(std::pow(2, -12));
-      const float float_output = 1.0f / (1.0f + std::exp(-float_input));
-      const int32_t quant_output = static_cast<int32_t>(
-          float_output * static_cast<float>(std::pow(2, 15)));
-      const int32_t quant_output_clamped =
-          std::min(int16_max, std::max(int16_min, quant_output));
-      output[index] = static_cast<int16_t>(quant_output_clamped);
-    }
-  }
-}
-
-template <int IntegerBits>
-void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch,
-                           int32_t n_input, int16_t* output) {
-  using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
-  using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      FX tanh_input = FX::FromRaw(input[index]);
-      F0 tanh_output = gemmlowp::tanh(tanh_input);
-      output[index] = tanh_output.raw();
-    }
-  }
-}
-
-void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
-                       int32_t n_batch, int32_t n_input, int16_t* output) {
-  assert(integer_bits <= 6);
-#define DISPATCH_TANH(i)                                       \
-  case i:                                                      \
-    PortableApplyTanhImpl<i>(input, n_batch, n_input, output); \
-    break;
-  switch (integer_bits) {
-    DISPATCH_TANH(0);
-    DISPATCH_TANH(1);
-    DISPATCH_TANH(2);
-    DISPATCH_TANH(3);
-    DISPATCH_TANH(4);
-    DISPATCH_TANH(5);
-    DISPATCH_TANH(6);
-    default:
-      return;
-  }
-#undef DISPATCH_TANH
-}
-
-void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
-                            int32_t n_input, int32_t integer_bits,
-                            int16_t* output) {
-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
-  const double two = 2.0;
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      const float float_input =
-          input[index] * std::pow(two, static_cast<double>(integer_bits));
-      const float float_output = std::tanh(float_input);
-      const int32_t quant_output = static_cast<int32_t>(
-          float_output * static_cast<float>(std::pow(2, 15)));
-      const int32_t quant_output_clamped =
-          std::min(int16_max, std::max(int16_min, quant_output));
-      output[index] = static_cast<int16_t>(quant_output_clamped);
-    }
-  }
-}
-
-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
-                      int n_batch, int n_input, int shift, int16_t* output) {
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      const int16_t a = input_1[index];
-      const int16_t b = input_2[index];
-      const int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
-      output[index] =
-          static_cast<int16_t>(gemmlowp::RoundingDivideByPOT(value, shift));
-    }
-  }
-}
-
-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
-                      int32_t multiplier, int32_t shift, int32_t n_batch,
-                      int32_t n_input, int32_t output_zp, int8_t* output) {
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      const int16_t a = input_1[index];
-      const int16_t b = input_2[index];
-      int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
-      value = MultiplyByQuantizedMultiplier(value, multiplier, shift);
-      value -= output_zp;
-      value = std::min(std::max(static_cast<int32_t>(-128), value),
-                       static_cast<int32_t>(127));
-
-      output[index] = static_cast<int8_t>(value);
-    }
-  }
-}
-
-void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
-                      int n_batch, int n_input, int16_t* output) {
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      int32_t sum = input_1[index] + input_2[index];
-      const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
-      output[index] = static_cast<int16_t>(sum_clamped);
-    }
-  }
-}
-
-float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
-                                     int v_size) {
-  float result = 0.0;
-  for (int v = 0; v < v_size; v++) {
-    result += *vector1++ * *vector2++;
-  }
-  return result;
-}
-
-namespace {
-inline int32_t VectorVectorDotProduct(const int16_t* vector1,
-                                      const int16_t* vector2, int v_size) {
-  int32_t result = 0;
-  for (int v = 0; v < v_size; v++) {
-    result += *vector1++ * *vector2++;
-  }
-  return result;
-}
-}  // namespace
-
-void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
-                                              const int16_t* vector2,
-                                              int v_size, int n_batch,
-                                              int32_t* result) {
-  for (int b = 0; b < n_batch; b++) {
-    result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
-    vector1 += v_size;
-    vector2 += v_size;
-  }
-}
-
-void PortableVectorBatchVectorCwiseProductAccumulate(
-    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
-    int32_t multiplier, int shift, int16_t* result) {
-  for (int b = 0; b < n_batch; b++) {
-    for (int v = 0; v < v_size; v++) {
-      int32_t prod = vector[v] * *batch_vector++;
-      prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
-      int32_t output = prod + *result;
-      output = std::max(std::min(static_cast<int32_t>(32767), output),
-                        static_cast<int32_t>(-32768));
-      *result++ = output;
-    }
-  }
-}
-
-void PortableSub1Vector(const float* vector, int v_size, float* result) {
-  for (int v = 0; v < v_size; v++) {
-    *result++ = 1.0f - *vector++;
-  }
-}
-
-void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) {
-  static const int16_t kOne = 32767;
-  for (int v = 0; v < v_size; v++) {
-    *result++ = kOne - *vector++;
-  }
-}
-
-void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
-                                  const float scale, float* result) {
-  for (int v = 0; v < v_size; ++v) {
-    *result++ = scale * *vector++;
-  }
-}
-
-void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
-                                     float* __restrict__ output_vector,
-                                     int v_size, int n_batch) {
-  for (int batch = 0; batch < n_batch; ++batch) {
-    float sum = 0.0f;
-    for (int i = 0; i < v_size; ++i) {
-      sum += input_vector[i];
-    }
-    const float mean = sum / v_size;
-    float sum_diff_sq = 0.0f;
-    for (int i = 0; i < v_size; ++i) {
-      const float diff = input_vector[i] - mean;
-      sum_diff_sq += diff * diff;
-    }
-    const float variance = sum_diff_sq / v_size;
-    constexpr float kNormalizationConstant = 1e-8f;
-    const float stddev_inv =
-        1.0f / std::sqrt(variance + kNormalizationConstant);
-    for (int i = 0; i < v_size; ++i) {
-      output_vector[i] = (input_vector[i] - mean) * stddev_inv;
-    }
-    input_vector += v_size;
-    output_vector += v_size;
-  }
-}
-
-void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
-                                  const int8_t* recurrent, int8_t recurrent_zp,
-                                  int32_t input_effective_scale_a,
-                                  int32_t input_effective_scale_b,
-                                  int32_t recurrent_effective_scale_a,
-                                  int32_t recurrent_effective_scale_b,
-                                  int32_t n_batch, int32_t n_cell,
-                                  int16_t* output) {
-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
-  for (int i = 0; i < n_batch * n_cell; ++i) {
-    int32_t x = static_cast<int32_t>(input[i]) - static_cast<int32_t>(input_zp);
-    int32_t h =
-        static_cast<int32_t>(recurrent[i]) - static_cast<int32_t>(recurrent_zp);
-    int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a,
-                                                     input_effective_scale_b);
-    int32_t h_scaled = MultiplyByQuantizedMultiplier(
-        h, recurrent_effective_scale_a, recurrent_effective_scale_b);
-    int32_t y = h_scaled + x_scaled;
-    if (y > int16_max) {
-      y = int16_max;
-    }
-    if (y < int16_min) {
-      y = int16_min;
-    }
-    output[i] = static_cast<int16_t>(y);
-  }
-}
-
-}  // namespace tensor_utils
-}  // namespace tflite

+ 0 - 333
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h

@@ -1,333 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
-
-#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
-
-#if defined(_MSC_VER)
-#define __restrict__ __restrict
-#endif
-
-namespace tflite {
-namespace tensor_utils {
-
-// Check if all entries of a vector are zero for float.
-bool IsZeroVector(const float* vector, int v_size) {
-  return PortableIsZeroVector(vector, v_size);
-}
-
-// Check if all entries of a vector are zero for int8_t.
-bool IsZeroVector(const int8_t* vector, int v_size) {
-  return PortableIsZeroVector(vector, v_size);
-}
-
-void SymmetricQuantizeFloats(const float* values, const int size,
-                             int8_t* quantized_values, float* min, float* max,
-                             float* scaling_factor) {
-  PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
-                                  scaling_factor);
-}
-
-void SymmetricQuantizeFloats(const float* values, const int size,
-                             int8_t* quantized_values, float min_value,
-                             float max_value, float* scaling_factor) {
-  PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
-                                  max_value, scaling_factor);
-}
-
-void AsymmetricQuantizeFloats(const float* values, const int size,
-                              int8_t* quantized_values, float* scaling_factor,
-                              int32_t* offset) {
-  PortableAsymmetricQuantizeFloats(values, size, quantized_values,
-                                   scaling_factor, offset);
-}
-
-void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
-                                         int m_cols, const float* vector,
-                                         int n_batch, float* result) {
-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
-                                              n_batch, result);
-}
-
-void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
-                                         const int m_rows, const int m_cols,
-                                         const int8_t* __restrict__ vector,
-                                         const float* scaling_factors,
-                                         int n_batch,
-                                         float* __restrict__ result) {
-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
-                                              scaling_factors, n_batch, result);
-}
-
-void MatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
-    const int8_t* __restrict__ vectors, const float* scaling_factors,
-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
-    bool* compute_row_sums, CpuBackendContext* context) {
-  PortableMatrixBatchVectorMultiplyAccumulate(
-      matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
-      per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
-      context);
-}
-
-void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
-                                         const int m_rows, const int m_cols,
-                                         const int8_t* __restrict__ vector,
-                                         const float* scaling_factors,
-                                         int n_batch, int32_t* scratch,
-                                         float* __restrict__ result,
-                                         CpuBackendContext* context) {
-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
-                                              scaling_factors, n_batch, result);
-}
-
-void SparseMatrixBatchVectorMultiplyAccumulate1x4(
-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
-    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
-  PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
-      matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
-}
-
-void SparseMatrixBatchVectorMultiplyAccumulate(
-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
-    float* __restrict__ result) {
-  PortableSparseMatrixBatchVectorMultiplyAccumulate(
-      matrix, ledger, m_rows, m_cols, vector, n_batch, result);
-}
-
-void SparseMatrixBatchVectorMultiplyAccumulate1x16(
-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
-    const int32_t output_shift, const int32_t output_offset,
-    const int32_t output_activation_min, const int32_t output_activation_max,
-
-    int8_t* __restrict__ result) {
-  PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
-      matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch,
-      input_offset, output_multiplier, output_shift, output_offset,
-      output_activation_min, output_activation_max, result);
-}
-
-void SparseMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
-    const int m_cols, const int8_t* __restrict__ vectors,
-    const float* scaling_factors, int n_batch, float* __restrict__ result) {
-  PortableSparseMatrixBatchVectorMultiplyAccumulate(
-      matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
-      result);
-}
-
-void MatrixBatchVectorMultiplyAccumulate(
-    const int8_t* input, const int32_t* bias,
-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
-    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
-  PortableMatrixBatchVectorMultiplyAccumulate(
-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
-      n_output, output_zp, scratch, output, context);
-}
-
-void MatrixBatchVectorMultiplyAccumulate(
-    const int8_t* input, const int32_t* bias,
-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
-    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
-  PortableMatrixBatchVectorMultiplyAccumulate(
-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
-      n_output, output_zp, scratch, output, context);
-}
-
-void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
-                                    int32_t n_row, int32_t n_col,
-                                    int32_t* output) {
-  PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
-}
-
-void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
-                               const int8_t* input_to_gate_weights,
-                               int32_t input_to_gate_effective_scale_a,
-                               int32_t input_to_gate_effective_scale_b,
-                               int32_t n_batch, int32_t n_input, int32_t n_cell,
-                               int8_t* gate_output, int8_t gate_output_zp) {
-  PortableMatrixBatchVectorMultiply(
-      input, input_zeropoint, input_to_gate_weights,
-      input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
-      n_input, n_cell, gate_output, gate_output_zp);
-}
-
-void MatrixBatchVectorMultiply(const int16_t* hidden,
-                               const int8_t* hidden_to_output_weights,
-                               int32_t proj_effective_scale_a,
-                               int32_t proj_effective_scale_b,
-                               const int32_t* gate_bias, int32_t n_batch,
-                               int32_t n_hidden, int32_t n_output,
-                               int32_t output_zp, int8_t* proj_output) {
-  PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
-                                    proj_effective_scale_a,
-                                    proj_effective_scale_b, gate_bias, n_batch,
-                                    n_hidden, n_output, output_zp, proj_output);
-}
-
-void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
-                    const int32_t* bias, int32_t layer_norm_scale_a,
-                    int32_t layer_norm_scale_b, int32_t variance_limit,
-                    int n_batch, int n_input, int16_t* output) {
-  PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
-                         layer_norm_scale_b, variance_limit, n_batch, n_input,
-                         output);
-}
-
-void ApplyLayerNormFloat(const int16_t* input,
-                         const int16_t* layer_norm_weights,
-                         int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
-                         const int32_t* bias, int n_batch, int n_input,
-                         int16_t* output) {
-  PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
-                              layer_norm_scale_b, bias, n_batch, n_input,
-                              output);
-}
-
-void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
-                  int16_t* output) {
-  PortableApplySigmoid(input, n_batch, n_input, output);
-}
-
-void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
-                       int16_t* output) {
-  PortableApplySigmoidFloat(input, n_batch, n_input, output);
-}
-
-void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
-               int32_t n_input, int16_t* output) {
-  PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
-}
-
-void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
-                    int32_t integer_bits, int16_t* output) {
-  PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
-}
-
-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
-              int n_input, int shift, int16_t* output) {
-  PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
-}
-
-void CwiseMul(const int16_t* input_1, const int16_t* input_2,
-              int32_t multiplier, int32_t shift, int32_t n_batch,
-              int32_t n_input, int32_t output_zp, int8_t* output) {
-  PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
-                   output_zp, output);
-}
-
-void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
-              int n_input, int16_t* output) {
-  PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
-}
-
-void CwiseClipping(float* vector, const int v_size,
-                   const float clipping_value) {
-  PortableCwiseClipping(vector, v_size, clipping_value);
-}
-
-void CwiseClipping(int16_t* vector, const int v_size,
-                   const int16_t clipping_value) {
-  PortableCwiseClipping(vector, v_size, clipping_value);
-}
-
-void CwiseClipping(int8_t* vector, const int v_size,
-                   const int8_t clipping_value) {
-  PortableCwiseClipping(vector, v_size, clipping_value);
-}
-
-void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
-                                             const int16_t* batch_vector,
-                                             int n_batch, int32_t multiplier,
-                                             int shift, int16_t* result) {
-  PortableVectorBatchVectorCwiseProductAccumulate(
-      vector, v_size, batch_vector, n_batch, multiplier, shift, result);
-}
-
-float VectorVectorDotProduct(const float* vector1, const float* vector2,
-                             int v_size) {
-  return PortableVectorVectorDotProduct(vector1, vector2, v_size);
-}
-
-void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
-                                      const int16_t* vector2, int v_size,
-                                      int n_batch, int32_t* result) {
-  PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch,
-                                           result);
-}
-
-void Sub1Vector(const float* vector, int v_size, float* result) {
-  PortableSub1Vector(vector, v_size, result);
-}
-
-void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
-  PortableSub1Vector(vector, v_size, result);
-}
-
-// Multiply all elements of vector with a scalar.
-void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
-                          float* result) {
-  PortableVectorScalarMultiply(vector, v_size, scale, result);
-}
-
-void ReductionSumVector(const float* input_vector, float* output_vector,
-                        int output_size, int reduction_size) {
-  PortableReductionSumVector(input_vector, output_vector, output_size,
-                             reduction_size);
-}
-
-void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
-                        int output_size, int reduction_size) {
-  PortableReductionSumVector(input_vector, output_vector, output_size,
-                             reduction_size);
-}
-
-void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
-                        int output_size, int reduction_size) {
-  PortableReductionSumVector(input_vector, output_vector, output_size,
-                             reduction_size);
-}
-
-void MeanStddevNormalization(const float* input_vector, float* output_vector,
-                             int v_size, int n_batch) {
-  PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
-}
-
-void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
-                          const int8_t* recurrent, int8_t recurrent_zp,
-                          int32_t input_effective_scale_a,
-                          int32_t input_effective_scale_b,
-                          int32_t recurrent_effective_scale_a,
-                          int32_t recurrent_effective_scale_b, int32_t n_batch,
-                          int32_t n_cell, int16_t* output) {
-  PortableTwoGateSaturatingAdd(
-      input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
-      input_effective_scale_b, recurrent_effective_scale_a,
-      recurrent_effective_scale_b, n_batch, n_cell, output);
-}
-
-}  // namespace tensor_utils
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_

+ 0 - 244
code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h

@@ -1,244 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
-
-#include <algorithm>
-#include <cstdint>
-
-#if defined(_MSC_VER)
-#define __restrict__ __restrict
-#endif
-
-namespace tflite {
-
-// Not all backends support CpuBackendContext usage, so forward declare to avoid
-// pulling in its implementation.
-class CpuBackendContext;
-
-namespace tensor_utils {
-
-template <typename T>
-bool PortableIsZeroVector(const T* vector, int v_size) {
-  for (int i = 0; i < v_size; ++i) {
-    if (vector[i] != 0) {
-      return false;
-    }
-  }
-  return true;
-}
-
-void PortableSymmetricQuantizeFloats(const float* values, const int size,
-                                     int8_t* quantized_values, float* min_value,
-                                     float* max_value, float* scaling_factor);
-
-void PortableSymmetricQuantizeFloats(const float* values, const int size,
-                                     int8_t* quantized_values, float min_value,
-                                     float max_value, float* scaling_factor);
-
-void PortableAsymmetricQuantizeFloats(const float* values, const int size,
-                                      int8_t* quantized_values,
-                                      float* scaling_factor, int32_t* offset);
-
-// Multiply a matrix by a batch vector, and store results in a batch-size
-// vector.
-void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
-                                                 int m_rows, int m_cols,
-                                                 const float* vector,
-                                                 int n_batch, float* result);
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
-    const int8_t* __restrict__ vectors, const float* scaling_factors,
-    int n_batch, float* __restrict__ result);
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
-    const int8_t* __restrict__ vectors, const float* scaling_factors,
-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
-    bool* compute_row_sums, CpuBackendContext* context);
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
-    const int8_t* __restrict__ vector, const float* scaling_factors,
-    int n_batch, int32_t* scratch, float* __restrict__ result,
-    CpuBackendContext* context);
-
-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
-    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
-
-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
-    float* __restrict__ result);
-
-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
-    const int32_t output_shift, const int32_t output_offset,
-    const int32_t output_activation_min, const int32_t output_activation_max,
-    int8_t* __restrict__ result);
-
-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
-    const int m_cols, const int8_t* __restrict__ vectors,
-    const float* scaling_factors, int n_batch, float* __restrict__ result);
-
-// Dot product of two vectors.
-float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
-                                     int v_size);
-
-void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
-                                              const int16_t* vector2,
-                                              int v_size, int n_batch,
-                                              int32_t* result);
-
-void PortableVectorBatchVectorCwiseProductAccumulate(
-    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
-    int32_t multiplier, int shift, int16_t* result);
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* input, const int32_t* bias,
-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
-    int32_t* scratch, int16_t* output, CpuBackendContext* context);
-
-void PortableMatrixBatchVectorMultiplyAccumulate(
-    const int8_t* input, const int32_t* bias,
-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
-    int32_t* scratch, int8_t* output, CpuBackendContext* context);
-
-void PortableMatrixBatchVectorMultiply(const int8_t* input,
-                                       int32_t input_zeropoint,
-                                       const int8_t* input_to_gate_weights,
-                                       int32_t input_to_gate_effective_scale_a,
-                                       int32_t input_to_gate_effective_scale_b,
-                                       int32_t n_batch, int32_t n_input,
-                                       int32_t n_cell, int8_t* gate_output,
-                                       int8_t gate_output_zp);
-
-void PortableMatrixBatchVectorMultiply(
-    const int16_t* hidden, const int8_t* hidden_to_output_weights,
-    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
-    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
-    int32_t n_output, int32_t output_zp, int8_t* proj_output);
-
-void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
-                                            int32_t scalar, int32_t n_row,
-                                            int32_t n_col, int32_t* output);
-
-void PortableApplyLayerNorm(const int16_t* input,
-                            const int16_t* layer_norm_weights,
-                            const int32_t* bias, int32_t layer_norm_scale_a,
-                            int32_t layer_norm_scale_b, int32_t variance_limit,
-                            int n_batch, int n_input, int16_t* output);
-
-void PortableApplyLayerNormFloat(const int16_t* input,
-                                 const int16_t* layer_norm_weights,
-                                 int32_t layer_norm_scale_a,
-                                 int32_t layer_norm_scale_b,
-                                 const int32_t* bias, int n_batch, int n_input,
-                                 int16_t* output);
-
-void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
-                          int32_t n_input, int16_t* output);
-
-void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
-                               int32_t n_input, int16_t* output);
-
-void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
-                       int32_t n_batch, int32_t n_input, int16_t* output);
-
-void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
-                            int32_t n_input, int32_t integer_bits,
-                            int16_t* output);
-
-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
-                      int n_batch, int n_input, int shift, int16_t* output);
-
-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
-                      int32_t multiplier, int32_t shift, int32_t n_batch,
-                      int32_t n_input, int32_t output_zp, int8_t* output);
-
-void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
-                      int n_batch, int n_input, int16_t* output);
-
-template <typename T>
-void PortableCwiseClipping(T* vector, const int v_size,
-                           const T& clipping_value) {
-  for (int i = 0; i < v_size; i++) {
-    vector[i] = std::max(std::min(clipping_value, vector[i]),
-                         static_cast<T>(-clipping_value));
-  }
-}
-
-// Batch vector initialization with another vector.
-void PortableVectorBatchVectorAssign(const float* vector, int v_size,
-                                     int n_batch, float* batch_vector);
-
-// Compute "1.0f - elements of vector" (used in CIFG).
-void PortableSub1Vector(const float* vector, int v_size, float* result);
-
-void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
-
-// Multiply all elements of vector with a scalar.
-void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
-                                  float* result);
-
-// Reduce-sum on a vector:
-// input_vector: pointer to input vector.
-// output_vector: pointer to vector.
-// output_size: output vector size.
-// reduction_size: number of consecutive elements from input vector which are
-// added to get one element of output.
-template <typename INPUT, typename OUTPUT>
-void PortableReductionSumVector(const INPUT* input_vector,
-                                OUTPUT* output_vector, int output_size,
-                                int reduction_size) {
-  for (int o = 0; o < output_size; o++) {
-    OUTPUT result = 0;
-    for (int r = 0; r < reduction_size; r++) {
-      result += input_vector[r];
-    }
-    output_vector[o] = result;
-    input_vector += reduction_size;
-  }
-}
-
-// Layer norm for each batch.
-void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
-                                     float* __restrict__ output_vector,
-                                     int v_size, int n_batch);
-
-// Saturate Add.
-void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
-                                  const int8_t* recurrent, int8_t recurrent_zp,
-                                  int32_t input_effective_scale_a,
-                                  int32_t input_effective_scale_b,
-                                  int32_t recurrent_effective_scale_a,
-                                  int32_t recurrent_effective_scale_b,
-                                  int32_t n_batch, int32_t n_cell,
-                                  int16_t* output);
-
-}  // namespace tensor_utils
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_

Bu fark içinde çok fazla dosya değişikliği olduğu için bazı dosyalar gösterilmiyor