| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605 |
- // Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- #include <stdint.h>
- #include <stdbool.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <malloc.h>
- #include <esp_nn.h>
- #include "test_utils.h"
- #if CONFIG_IDF_CMAKE
- #if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC))
- #define IDF_HEAP_CAPS 1
- #endif
- #if IDF_HEAP_CAPS
- #include "esp_heap_caps.h"
- #endif
- #endif
- void esp_nn_depthwise_conv_s8_test()
- {
- int8_t *input = NULL, *filter_data = NULL, *out_data_c = NULL, *out_data_opt = NULL;
- int32_t *bias = NULL;
- int32_t input_offset = 5; /* some number in [-128, 127] */
- int32_t out_offset = 7;
- int32_t activation_min = -125;
- int32_t activation_max = 120;
- void *scratch_buf = NULL;
- /* independent variables */
- int input_wd, input_ht, channels;
- uint16_t filter_ht, filter_wd, ch_mult;
- uint16_t pad_wd, pad_ht, stride_wd, stride_ht;
- // run for 15 iterations
- for (int itr = 0; itr < 15; itr++) {
- /* prepare data */
- switch (itr) {
- case 0: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0)
- input_wd = 18;
- input_ht = 18;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 16;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 1: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (1,1)
- input_wd = 10;
- input_ht = 10;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 16;
- pad_wd = 1;
- pad_ht = 1;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 2: // (ch_mult 1, (channels % 8) = 0), filter (3,3), pad (1,1)
- input_wd = 10;
- input_ht = 10;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 24;
- pad_wd = 1;
- pad_ht = 1;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 3: // other filter sizes (ch_mult 1, (channels % 8) = 0)
- input_wd = 10;
- input_ht = 10;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 24;
- pad_wd = 1;
- pad_ht = 1;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 4: // other filter sizes (ch_mult 8 = 0)
- input_wd = 6;
- input_ht = 6;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 8;
- channels = 4;
- pad_wd = 1;
- pad_ht = 1;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 5: // other filter sizes (ch_mult 8 = 0)
- input_wd = 12;
- input_ht = 12;
- filter_ht = 5;
- filter_wd = 5;
- ch_mult = 8;
- channels = 4;
- pad_wd = 1;
- pad_ht = 1;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 6: // other filter sizes (ch_mult 4 = 0)
- input_wd = 6;
- input_ht = 6;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 4;
- channels = 4;
- pad_wd = 1;
- pad_ht = 1;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 7: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2)
- input_wd = 6;
- input_ht = 6;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 16;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 2;
- stride_ht = 2;
- break;
- case 8: // same as case 7, with large parameters
- input_wd = 58;
- input_ht = 58;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 128;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 2;
- stride_ht = 2;
- break;
- case 9: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2)
- input_wd = 6;
- input_ht = 6;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 16;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 2;
- stride_ht = 2;
- break;
- default:
- input_wd = 6;
- input_ht = 6;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 16;
- stride_wd = rand() % 2 + 1;
- stride_ht = stride_wd;
- pad_wd = stride_wd == 1 ? 0 : rand() % 2;
- pad_ht = pad_wd;
- printf("stride(%d), pad (%d)\t", stride_wd, pad_wd);
- break;
- }
- uint16_t out_wd = (input_wd - filter_wd + 1) / stride_wd;
- uint16_t out_ht = (input_ht - filter_ht + 1) / stride_ht;
- if (itr == 9) {
- // expect the function to handle this gracefully
- out_wd += 1;
- out_ht += 1;
- }
- int in_size = input_wd * input_ht * channels;
- int out_size = out_wd * out_ht * channels * ch_mult;
- int filter_size = filter_wd * filter_ht * channels * ch_mult + 4;
- int bias_size = channels * ch_mult + 1;
- int32_t out_shift[channels * ch_mult];
- int32_t out_mult[channels * ch_mult];
- #if IDF_HEAP_CAPS
- int8_t *input_orig = (int8_t *) heap_caps_malloc(in_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- int8_t *out_c_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- int8_t *out_opt_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- filter_data = (int8_t *) heap_caps_malloc(filter_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- bias = (int32_t *) heap_caps_malloc(bias_size * 4, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- input = 16 + input_orig - ((uint32_t) input_orig & 0xf);
- out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf);
- out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf);
- #else
- input = memalign(16, in_size + 16);
- filter_data = memalign(16, filter_size);
- out_data_c = memalign(16, out_size + 16);
- out_data_opt = memalign(16, out_size + 16);
- bias = memalign(16, bias_size * 4);
- int8_t *input_orig = input;
- int8_t *out_c_orig = out_data_c;
- int8_t *out_opt_orig = out_data_opt;
- #endif
- if (bias == NULL || input == NULL || filter_data == NULL ||
- out_data_c == NULL || out_data_opt == NULL || bias == NULL) {
- printf(ANSI_COLOR_RED"%s[%d] allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
- goto dc_s8_cleanup;
- }
- /* Generate input data */
- for (int i = 0; i < in_size; ++i) {
- input[i] = rand() % 128;
- }
- /* Generate filter data */
- for (int i = 0; i < filter_size; ++i) {
- filter_data[i] = rand() % 256 - 128;
- }
- /* Generate bias data */
- for (int i = 0; i < channels * ch_mult; ++i) {
- bias[i + 1] = rand() % INT16_MAX; //0th index left for unalignment
- out_shift[i] = -8 + rand() % 3;
- out_mult[i] = 0x7eb0e200 + rand() % 50;
- }
- data_dims_t input_dims = {.width = input_wd, .height = input_ht, .channels = channels, 1};
- data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = channels * ch_mult, 1};
- data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0};
- dw_conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset, .ch_mult = ch_mult,
- .stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht},
- .dilation = {0, 0}, .activation = {activation_min, activation_max}};
- quant_data_t quant_data = {.shift = out_shift, .mult = out_mult};
- int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size(&input_dims, &filter_dims,
- &output_dims, &conv_params);
- if (scratch_buf_size > 0) {
- #if IDF_HEAP_CAPS
- scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- int align_sz = 16 - (((int32_t) scratch_buf) & 0xf);
- #else
- scratch_buf = memalign(16, scratch_buf_size);
- int align_sz = 0;
- #endif
- if (scratch_buf == NULL) {
- printf(ANSI_COLOR_RED"%s[%d] scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET,
- __FUNCTION__, itr, scratch_buf_size);
- goto dc_s8_cleanup;
- }
- esp_nn_set_depthwise_conv_scratch_buf(scratch_buf + align_sz);
- }
- if (itr == 0) {
- /* enable profiler */
- profile_c_start();
- }
- /* C function */
- esp_nn_depthwise_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 4,
- bias + 1, &output_dims, out_data_c, &conv_params, &quant_data);
- if (itr == 0) {
- profile_c_end();
- profile_opt_start();
- }
- /* Optimized function */
- esp_nn_depthwise_conv_s8(&input_dims, input, &filter_dims, filter_data + 4,
- bias + 1, &output_dims, out_data_opt, &conv_params, &quant_data);
- if (itr == 0) {
- /* disable profiler */
- profile_opt_end();
- }
- bool ret = CHECK_EQUAL(out_data_c, out_data_opt, out_size);
- if (ret == false) {
- printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
- printf("Output: \n");
- PRINT_ARRAY_HEX(out_data_opt, out_size / out_ht, out_ht);
- printf("Expected: \n");
- PRINT_ARRAY_HEX(out_data_c, out_size / out_ht, out_ht);
- printf("Input:\n");
- PRINT_ARRAY_HEX(input, in_size / input_ht, input_ht);
- printf("Filter data:\n");
- PRINT_ARRAY_HEX(filter_data + 4, (filter_size - 4) / filter_ht, filter_ht);
- printf("bias data:\n");
- PRINT_ARRAY_INT(bias + 1, ch_mult * channels, 1);
- goto dc_s8_cleanup;
- }
- printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
- dc_s8_cleanup:
- if (input) {
- free(input_orig);
- }
- if (filter_data) {
- free(filter_data);
- }
- if (out_data_c) {
- free(out_c_orig);
- }
- if (out_data_opt) {
- free(out_opt_orig);
- }
- if (bias) {
- free(bias);
- }
- if (scratch_buf) {
- free(scratch_buf);
- }
- }
- }
- void esp_nn_conv_s8_test()
- {
- const int32_t input_offset = 5; /* some number in [-128, 127] */
- const int32_t activation_min = -125;
- const int32_t activation_max = 122;
- const int32_t out_offset = 3;
- void *scratch_buf = NULL;
- int8_t *input_orig;
- int8_t *out_c_orig;
- int8_t *out_opt_orig;
- int8_t *filter_data;
- int32_t *bias;
- /* independent variable */
- int in_wd, in_ht, in_channels, out_channels;
- uint16_t filter_ht, filter_wd;
- uint16_t pad_wd, pad_ht, stride_wd, stride_ht;
- // run for 10 iterations
- for (int itr = 0; itr < 10; itr++) {
- switch (itr) {
- case 0: // ch % 8 == 0 && filter (1,1), padding (0,0)
- in_wd = 10;
- in_ht = 10;
- in_channels = 64;
- out_channels = 64;
- filter_ht = 1;
- filter_wd = 1;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 1: // ch % 4 == 0 && (in_wd * in_ht) % 16 == 0
- in_wd = 4;
- in_ht = 4;
- in_channels = 20;
- out_channels = 8;
- filter_ht = 1;
- filter_wd = 1;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 2: // ch, filter (3x3x3)
- in_wd = 10;
- in_ht = 10;
- in_channels = 3;
- out_channels = 64;
- filter_ht = 3;
- filter_wd = 3;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 3: // remaining pad (0, 0)
- in_wd = 10;
- in_ht = 10;
- in_channels = 3;
- out_channels = 64;
- filter_ht = 1;
- filter_wd = 1;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 4: // unopt case
- in_wd = 10;
- in_ht = 10;
- in_channels = 12;
- out_channels = 64;
- filter_ht = 3;
- filter_wd = 3;
- pad_wd = 1;
- pad_ht = 1;
- stride_wd = 1;
- stride_ht = 1;
- break;
- case 5: // ch % 8 == 0 & stride (2,2)
- in_wd = 16;
- in_ht = 16;
- in_channels = 16;
- out_channels = 16;
- filter_ht = 1;
- filter_wd = 1;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 2;
- stride_ht = 2;
- break;
- case 6: // ch % 8 == 0 && filter (1,1), padding (0,0)
- in_wd = 2;
- in_ht = 2;
- in_channels = 8;
- out_channels = 8;
- filter_ht = 1;
- filter_wd = 1;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 1;
- stride_ht = 1;
- break;
- default: // ch % 8 == 0
- in_wd = 8;
- in_ht = 8;
- in_channels = 16;
- out_channels = 16;
- filter_ht = 1;
- filter_wd = 1;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 1;
- stride_ht = 1;
- break;
- }
- /* prepare data */
- uint16_t out_wd = (in_wd - filter_wd + 1) / stride_wd;
- uint16_t out_ht = (in_ht - filter_ht + 1) / stride_ht;
- int in_size = in_wd * in_ht * in_channels;
- int filter_size = filter_wd * filter_ht * in_channels * out_channels + 2;
- int out_size = out_wd * out_ht * out_channels;
- #if IDF_HEAP_CAPS
- input_orig = (int8_t *) heap_caps_malloc(in_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- out_c_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- out_opt_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- filter_data = (int8_t *) heap_caps_malloc(filter_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- bias = (int32_t *) heap_caps_malloc(128 + sizeof (int32_t) * out_channels, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- int8_t *input = 16 + input_orig - ((uint32_t) input_orig & 0xf);
- int8_t *out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf);
- int8_t *out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf);
- #else
- int8_t *input = memalign(16, in_size);
- int8_t *out_data_c = memalign(16, out_size);
- int8_t *out_data_opt = memalign(16, out_size);
- filter_data = memalign(16, filter_size);
- bias = calloc(1, 128 + sizeof (int32_t) * out_channels);
- input_orig = input;
- out_c_orig = out_data_c;
- out_opt_orig = out_data_opt;
- #endif
- int32_t *out_shift = calloc(1, 128 + sizeof (int32_t) * out_channels);
- int32_t *out_mult = calloc(1, 128 + sizeof (int32_t) * out_channels);
- if (input == NULL || filter_data == NULL ||
- out_data_c == NULL || out_data_opt == NULL) {
- printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
- goto conv_s8_cleanup;
- }
- if (bias == NULL || out_shift == NULL || out_mult == NULL) {
- printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
- goto conv_s8_cleanup;
- }
- /* Generate input data between -128 -> +127 */
- for (int i = 0; i < in_size; ++i) {
- input[i] = rand() % 255 - 128;
- }
- /* Generate filter data between -128 -> +127 */
- for (int i = 0; i < filter_size; ++i) {
- filter_data[i] = rand() % 256 - 128;
- }
- /* Generate bias data */
- for (int i = 0; i < out_channels; ++i) {
- bias[i] = (int32_t)rand() % UINT16_MAX + UINT8_MAX;
- }
- /* Shift and multiplier */
- for (int i = 0; i < out_channels; ++i) {
- out_shift[i] = -10 + rand() % 2;
- out_mult[i] = 0x7f67f4f8 + rand() % 50;
- }
- data_dims_t input_dims = {.width = in_wd, .height = in_ht, .channels = in_channels, 1};
- data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = out_channels, 1};
- data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0};
- conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset,
- .stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht},
- .dilation = {0, 0}, .activation = {activation_min, activation_max}};
- quant_data_t quant_data = {.shift = out_shift, .mult = out_mult};
- int scratch_buf_size = esp_nn_get_conv_scratch_size(&input_dims, &filter_dims,
- &output_dims, &conv_params);
- if (scratch_buf_size > 0) {
- #if IDF_HEAP_CAPS
- void *scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
- int align_sz = 16 - (((int32_t) scratch_buf) & 0xf);
- #else
- void *scratch_buf = memalign(16, scratch_buf_size);
- int align_sz = 0;
- #endif
- if (scratch_buf == NULL) {
- printf(ANSI_COLOR_RED"%s scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET, __FUNCTION__, scratch_buf_size);
- goto conv_s8_cleanup;
- }
- esp_nn_set_conv_scratch_buf(scratch_buf + align_sz);
- }
- if (itr == 0) {
- /* enable profiler */
- profile_c_start();
- }
- /* C function */
- esp_nn_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 2,
- bias, &output_dims, out_data_c, &conv_params, &quant_data);
- if (itr == 0) {
- profile_c_end();
- profile_opt_start();
- }
- /* Optimized function */
- esp_nn_conv_s8(&input_dims, input, &filter_dims, filter_data + 2,
- bias, &output_dims, out_data_opt, &conv_params, &quant_data);
- if (itr == 0) {
- /* disable profiler */
- profile_opt_end();
- }
- bool ret = CHECK_EQUAL(out_data_c, out_data_opt, out_size);
- if (ret == false) {
- printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
- printf("Output: \n");
- PRINT_ARRAY_HEX(out_data_opt, out_size / out_ht, out_ht);
- printf("Expected: \n");
- PRINT_ARRAY_HEX(out_data_c, out_size / out_ht, out_ht);
- printf("Input:\n");
- PRINT_ARRAY_HEX(input, in_size / in_ht, in_ht);
- printf("Filter data:\n");
- PRINT_ARRAY_HEX(filter_data + 2, (filter_size - 2) / filter_ht, filter_ht);
- printf("bias data:\n");
- PRINT_ARRAY_INT(bias, out_channels, 1);
- goto conv_s8_cleanup;
- }
- printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
- conv_s8_cleanup:
- if (input) {
- free(input_orig);
- }
- if (filter_data) {
- free(filter_data);
- }
- if (out_data_c) {
- free(out_c_orig);
- }
- if (out_data_opt) {
- free(out_opt_orig);
- }
- if (bias) {
- free(bias);
- }
- if (out_shift) {
- free(out_shift);
- }
- if (out_mult) {
- free(out_mult);
- }
- if (scratch_buf) {
- free(scratch_buf);
- }
- }
- }
|