convolution_test.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. // Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <stdint.h>
  15. #include <stdbool.h>
  16. #include <stdio.h>
  17. #include <stdlib.h>
  18. #include <malloc.h>
  19. #include <esp_nn.h>
  20. #include "test_utils.h"
  21. #if CONFIG_IDF_CMAKE
  22. #if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC))
  23. #define IDF_HEAP_CAPS 1
  24. #endif
  25. #if IDF_HEAP_CAPS
  26. #include "esp_heap_caps.h"
  27. #endif
  28. #endif
  29. void esp_nn_depthwise_conv_s8_test()
  30. {
  31. int8_t *input = NULL, *filter_data = NULL, *out_data_c = NULL, *out_data_opt = NULL;
  32. int32_t *bias = NULL;
  33. int32_t input_offset = 5; /* some number in [-128, 127] */
  34. int32_t out_offset = 7;
  35. int32_t activation_min = -125;
  36. int32_t activation_max = 120;
  37. void *scratch_buf = NULL;
  38. /* independent variables */
  39. int input_wd, input_ht, channels;
  40. uint16_t filter_ht, filter_wd, ch_mult;
  41. uint16_t pad_wd, pad_ht, stride_wd, stride_ht;
  42. // run for 15 iterations
  43. for (int itr = 0; itr < 15; itr++) {
  44. /* prepare data */
  45. switch (itr) {
  46. case 0: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0)
  47. input_wd = 18;
  48. input_ht = 18;
  49. filter_ht = 3;
  50. filter_wd = 3;
  51. ch_mult = 1;
  52. channels = 16;
  53. pad_wd = 0;
  54. pad_ht = 0;
  55. stride_wd = 1;
  56. stride_ht = 1;
  57. break;
  58. case 1: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (1,1)
  59. input_wd = 10;
  60. input_ht = 10;
  61. filter_ht = 3;
  62. filter_wd = 3;
  63. ch_mult = 1;
  64. channels = 16;
  65. pad_wd = 1;
  66. pad_ht = 1;
  67. stride_wd = 1;
  68. stride_ht = 1;
  69. break;
  70. case 2: // (ch_mult 1, (channels % 8) = 0), filter (3,3), pad (1,1)
  71. input_wd = 10;
  72. input_ht = 10;
  73. filter_ht = 3;
  74. filter_wd = 3;
  75. ch_mult = 1;
  76. channels = 24;
  77. pad_wd = 1;
  78. pad_ht = 1;
  79. stride_wd = 1;
  80. stride_ht = 1;
  81. break;
  82. case 3: // other filter sizes (ch_mult 1, (channels % 8) = 0)
  83. input_wd = 10;
  84. input_ht = 10;
  85. filter_ht = 3;
  86. filter_wd = 3;
  87. ch_mult = 1;
  88. channels = 24;
  89. pad_wd = 1;
  90. pad_ht = 1;
  91. stride_wd = 1;
  92. stride_ht = 1;
  93. break;
  94. case 4: // other filter sizes (ch_mult 8 = 0)
  95. input_wd = 6;
  96. input_ht = 6;
  97. filter_ht = 3;
  98. filter_wd = 3;
  99. ch_mult = 8;
  100. channels = 4;
  101. pad_wd = 1;
  102. pad_ht = 1;
  103. stride_wd = 1;
  104. stride_ht = 1;
  105. break;
  106. case 5: // other filter sizes (ch_mult 8 = 0)
  107. input_wd = 12;
  108. input_ht = 12;
  109. filter_ht = 5;
  110. filter_wd = 5;
  111. ch_mult = 8;
  112. channels = 4;
  113. pad_wd = 1;
  114. pad_ht = 1;
  115. stride_wd = 1;
  116. stride_ht = 1;
  117. break;
  118. case 6: // other filter sizes (ch_mult 4 = 0)
  119. input_wd = 6;
  120. input_ht = 6;
  121. filter_ht = 3;
  122. filter_wd = 3;
  123. ch_mult = 4;
  124. channels = 4;
  125. pad_wd = 1;
  126. pad_ht = 1;
  127. stride_wd = 1;
  128. stride_ht = 1;
  129. break;
  130. case 7: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2)
  131. input_wd = 6;
  132. input_ht = 6;
  133. filter_ht = 3;
  134. filter_wd = 3;
  135. ch_mult = 1;
  136. channels = 16;
  137. pad_wd = 0;
  138. pad_ht = 0;
  139. stride_wd = 2;
  140. stride_ht = 2;
  141. break;
  142. case 8: // same as case 7, with large parameters
  143. input_wd = 58;
  144. input_ht = 58;
  145. filter_ht = 3;
  146. filter_wd = 3;
  147. ch_mult = 1;
  148. channels = 128;
  149. pad_wd = 0;
  150. pad_ht = 0;
  151. stride_wd = 2;
  152. stride_ht = 2;
  153. break;
  154. case 9: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2)
  155. input_wd = 6;
  156. input_ht = 6;
  157. filter_ht = 3;
  158. filter_wd = 3;
  159. ch_mult = 1;
  160. channels = 16;
  161. pad_wd = 0;
  162. pad_ht = 0;
  163. stride_wd = 2;
  164. stride_ht = 2;
  165. break;
  166. default:
  167. input_wd = 6;
  168. input_ht = 6;
  169. filter_ht = 3;
  170. filter_wd = 3;
  171. ch_mult = 1;
  172. channels = 16;
  173. stride_wd = rand() % 2 + 1;
  174. stride_ht = stride_wd;
  175. pad_wd = stride_wd == 1 ? 0 : rand() % 2;
  176. pad_ht = pad_wd;
  177. printf("stride(%d), pad (%d)\t", stride_wd, pad_wd);
  178. break;
  179. }
  180. uint16_t out_wd = (input_wd - filter_wd + 1) / stride_wd;
  181. uint16_t out_ht = (input_ht - filter_ht + 1) / stride_ht;
  182. if (itr == 9) {
  183. // expect the function to handle this gracefully
  184. out_wd += 1;
  185. out_ht += 1;
  186. }
  187. int in_size = input_wd * input_ht * channels;
  188. int out_size = out_wd * out_ht * channels * ch_mult;
  189. int filter_size = filter_wd * filter_ht * channels * ch_mult + 4;
  190. int bias_size = channels * ch_mult + 1;
  191. int32_t out_shift[channels * ch_mult];
  192. int32_t out_mult[channels * ch_mult];
  193. #if IDF_HEAP_CAPS
  194. int8_t *input_orig = (int8_t *) heap_caps_malloc(in_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  195. int8_t *out_c_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  196. int8_t *out_opt_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  197. filter_data = (int8_t *) heap_caps_malloc(filter_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  198. bias = (int32_t *) heap_caps_malloc(bias_size * 4, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  199. input = 16 + input_orig - ((uint32_t) input_orig & 0xf);
  200. out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf);
  201. out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf);
  202. #else
  203. input = memalign(16, in_size + 16);
  204. filter_data = memalign(16, filter_size);
  205. out_data_c = memalign(16, out_size + 16);
  206. out_data_opt = memalign(16, out_size + 16);
  207. bias = memalign(16, bias_size * 4);
  208. int8_t *input_orig = input;
  209. int8_t *out_c_orig = out_data_c;
  210. int8_t *out_opt_orig = out_data_opt;
  211. #endif
  212. if (bias == NULL || input == NULL || filter_data == NULL ||
  213. out_data_c == NULL || out_data_opt == NULL || bias == NULL) {
  214. printf(ANSI_COLOR_RED"%s[%d] allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
  215. goto dc_s8_cleanup;
  216. }
  217. /* Generate input data */
  218. for (int i = 0; i < in_size; ++i) {
  219. input[i] = rand() % 128;
  220. }
  221. /* Generate filter data */
  222. for (int i = 0; i < filter_size; ++i) {
  223. filter_data[i] = rand() % 256 - 128;
  224. }
  225. /* Generate bias data */
  226. for (int i = 0; i < channels * ch_mult; ++i) {
  227. bias[i + 1] = rand() % INT16_MAX; //0th index left for unalignment
  228. out_shift[i] = -8 + rand() % 3;
  229. out_mult[i] = 0x7eb0e200 + rand() % 50;
  230. }
  231. data_dims_t input_dims = {.width = input_wd, .height = input_ht, .channels = channels, 1};
  232. data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = channels * ch_mult, 1};
  233. data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0};
  234. dw_conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset, .ch_mult = ch_mult,
  235. .stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht},
  236. .dilation = {0, 0}, .activation = {activation_min, activation_max}};
  237. quant_data_t quant_data = {.shift = out_shift, .mult = out_mult};
  238. int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size(&input_dims, &filter_dims,
  239. &output_dims, &conv_params);
  240. if (scratch_buf_size > 0) {
  241. #if IDF_HEAP_CAPS
  242. scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  243. int align_sz = 16 - (((int32_t) scratch_buf) & 0xf);
  244. #else
  245. scratch_buf = memalign(16, scratch_buf_size);
  246. int align_sz = 0;
  247. #endif
  248. if (scratch_buf == NULL) {
  249. printf(ANSI_COLOR_RED"%s[%d] scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET,
  250. __FUNCTION__, itr, scratch_buf_size);
  251. goto dc_s8_cleanup;
  252. }
  253. esp_nn_set_depthwise_conv_scratch_buf(scratch_buf + align_sz);
  254. }
  255. if (itr == 0) {
  256. /* enable profiler */
  257. profile_c_start();
  258. }
  259. /* C function */
  260. esp_nn_depthwise_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 4,
  261. bias + 1, &output_dims, out_data_c, &conv_params, &quant_data);
  262. if (itr == 0) {
  263. profile_c_end();
  264. profile_opt_start();
  265. }
  266. /* Optimized function */
  267. esp_nn_depthwise_conv_s8(&input_dims, input, &filter_dims, filter_data + 4,
  268. bias + 1, &output_dims, out_data_opt, &conv_params, &quant_data);
  269. if (itr == 0) {
  270. /* disable profiler */
  271. profile_opt_end();
  272. }
  273. bool ret = CHECK_EQUAL(out_data_c, out_data_opt, out_size);
  274. if (ret == false) {
  275. printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
  276. printf("Output: \n");
  277. PRINT_ARRAY_HEX(out_data_opt, out_size / out_ht, out_ht);
  278. printf("Expected: \n");
  279. PRINT_ARRAY_HEX(out_data_c, out_size / out_ht, out_ht);
  280. printf("Input:\n");
  281. PRINT_ARRAY_HEX(input, in_size / input_ht, input_ht);
  282. printf("Filter data:\n");
  283. PRINT_ARRAY_HEX(filter_data + 4, (filter_size - 4) / filter_ht, filter_ht);
  284. printf("bias data:\n");
  285. PRINT_ARRAY_INT(bias + 1, ch_mult * channels, 1);
  286. goto dc_s8_cleanup;
  287. }
  288. printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
  289. dc_s8_cleanup:
  290. if (input) {
  291. free(input_orig);
  292. }
  293. if (filter_data) {
  294. free(filter_data);
  295. }
  296. if (out_data_c) {
  297. free(out_c_orig);
  298. }
  299. if (out_data_opt) {
  300. free(out_opt_orig);
  301. }
  302. if (bias) {
  303. free(bias);
  304. }
  305. if (scratch_buf) {
  306. free(scratch_buf);
  307. }
  308. }
  309. }
  310. void esp_nn_conv_s8_test()
  311. {
  312. const int32_t input_offset = 5; /* some number in [-128, 127] */
  313. const int32_t activation_min = -125;
  314. const int32_t activation_max = 122;
  315. const int32_t out_offset = 3;
  316. void *scratch_buf = NULL;
  317. int8_t *input_orig;
  318. int8_t *out_c_orig;
  319. int8_t *out_opt_orig;
  320. int8_t *filter_data;
  321. int32_t *bias;
  322. /* independent variable */
  323. int in_wd, in_ht, in_channels, out_channels;
  324. uint16_t filter_ht, filter_wd;
  325. uint16_t pad_wd, pad_ht, stride_wd, stride_ht;
  326. // run for 10 iterations
  327. for (int itr = 0; itr < 10; itr++) {
  328. switch (itr) {
  329. case 0: // ch % 8 == 0 && filter (1,1), padding (0,0)
  330. in_wd = 10;
  331. in_ht = 10;
  332. in_channels = 64;
  333. out_channels = 64;
  334. filter_ht = 1;
  335. filter_wd = 1;
  336. pad_wd = 0;
  337. pad_ht = 0;
  338. stride_wd = 1;
  339. stride_ht = 1;
  340. break;
  341. case 1: // ch % 4 == 0 && (in_wd * in_ht) % 16 == 0
  342. in_wd = 4;
  343. in_ht = 4;
  344. in_channels = 20;
  345. out_channels = 8;
  346. filter_ht = 1;
  347. filter_wd = 1;
  348. pad_wd = 0;
  349. pad_ht = 0;
  350. stride_wd = 1;
  351. stride_ht = 1;
  352. break;
  353. case 2: // ch, filter (3x3x3)
  354. in_wd = 10;
  355. in_ht = 10;
  356. in_channels = 3;
  357. out_channels = 64;
  358. filter_ht = 3;
  359. filter_wd = 3;
  360. pad_wd = 0;
  361. pad_ht = 0;
  362. stride_wd = 1;
  363. stride_ht = 1;
  364. break;
  365. case 3: // remaining pad (0, 0)
  366. in_wd = 10;
  367. in_ht = 10;
  368. in_channels = 3;
  369. out_channels = 64;
  370. filter_ht = 1;
  371. filter_wd = 1;
  372. pad_wd = 0;
  373. pad_ht = 0;
  374. stride_wd = 1;
  375. stride_ht = 1;
  376. break;
  377. case 4: // unopt case
  378. in_wd = 10;
  379. in_ht = 10;
  380. in_channels = 12;
  381. out_channels = 64;
  382. filter_ht = 3;
  383. filter_wd = 3;
  384. pad_wd = 1;
  385. pad_ht = 1;
  386. stride_wd = 1;
  387. stride_ht = 1;
  388. break;
  389. case 5: // ch % 8 == 0 & stride (2,2)
  390. in_wd = 16;
  391. in_ht = 16;
  392. in_channels = 16;
  393. out_channels = 16;
  394. filter_ht = 1;
  395. filter_wd = 1;
  396. pad_wd = 0;
  397. pad_ht = 0;
  398. stride_wd = 2;
  399. stride_ht = 2;
  400. break;
  401. case 6: // ch % 8 == 0 && filter (1,1), padding (0,0)
  402. in_wd = 2;
  403. in_ht = 2;
  404. in_channels = 8;
  405. out_channels = 8;
  406. filter_ht = 1;
  407. filter_wd = 1;
  408. pad_wd = 0;
  409. pad_ht = 0;
  410. stride_wd = 1;
  411. stride_ht = 1;
  412. break;
  413. default: // ch % 8 == 0
  414. in_wd = 8;
  415. in_ht = 8;
  416. in_channels = 16;
  417. out_channels = 16;
  418. filter_ht = 1;
  419. filter_wd = 1;
  420. pad_wd = 0;
  421. pad_ht = 0;
  422. stride_wd = 1;
  423. stride_ht = 1;
  424. break;
  425. }
  426. /* prepare data */
  427. uint16_t out_wd = (in_wd - filter_wd + 1) / stride_wd;
  428. uint16_t out_ht = (in_ht - filter_ht + 1) / stride_ht;
  429. int in_size = in_wd * in_ht * in_channels;
  430. int filter_size = filter_wd * filter_ht * in_channels * out_channels + 2;
  431. int out_size = out_wd * out_ht * out_channels;
  432. #if IDF_HEAP_CAPS
  433. input_orig = (int8_t *) heap_caps_malloc(in_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  434. out_c_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  435. out_opt_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  436. filter_data = (int8_t *) heap_caps_malloc(filter_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  437. bias = (int32_t *) heap_caps_malloc(128 + sizeof (int32_t) * out_channels, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  438. int8_t *input = 16 + input_orig - ((uint32_t) input_orig & 0xf);
  439. int8_t *out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf);
  440. int8_t *out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf);
  441. #else
  442. int8_t *input = memalign(16, in_size);
  443. int8_t *out_data_c = memalign(16, out_size);
  444. int8_t *out_data_opt = memalign(16, out_size);
  445. filter_data = memalign(16, filter_size);
  446. bias = calloc(1, 128 + sizeof (int32_t) * out_channels);
  447. input_orig = input;
  448. out_c_orig = out_data_c;
  449. out_opt_orig = out_data_opt;
  450. #endif
  451. int32_t *out_shift = calloc(1, 128 + sizeof (int32_t) * out_channels);
  452. int32_t *out_mult = calloc(1, 128 + sizeof (int32_t) * out_channels);
  453. if (input == NULL || filter_data == NULL ||
  454. out_data_c == NULL || out_data_opt == NULL) {
  455. printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
  456. goto conv_s8_cleanup;
  457. }
  458. if (bias == NULL || out_shift == NULL || out_mult == NULL) {
  459. printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
  460. goto conv_s8_cleanup;
  461. }
  462. /* Generate input data between -128 -> +127 */
  463. for (int i = 0; i < in_size; ++i) {
  464. input[i] = rand() % 255 - 128;
  465. }
  466. /* Generate filter data between -128 -> +127 */
  467. for (int i = 0; i < filter_size; ++i) {
  468. filter_data[i] = rand() % 256 - 128;
  469. }
  470. /* Generate bias data */
  471. for (int i = 0; i < out_channels; ++i) {
  472. bias[i] = (int32_t)rand() % UINT16_MAX + UINT8_MAX;
  473. }
  474. /* Shift and multiplier */
  475. for (int i = 0; i < out_channels; ++i) {
  476. out_shift[i] = -10 + rand() % 2;
  477. out_mult[i] = 0x7f67f4f8 + rand() % 50;
  478. }
  479. data_dims_t input_dims = {.width = in_wd, .height = in_ht, .channels = in_channels, 1};
  480. data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = out_channels, 1};
  481. data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0};
  482. conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset,
  483. .stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht},
  484. .dilation = {0, 0}, .activation = {activation_min, activation_max}};
  485. quant_data_t quant_data = {.shift = out_shift, .mult = out_mult};
  486. int scratch_buf_size = esp_nn_get_conv_scratch_size(&input_dims, &filter_dims,
  487. &output_dims, &conv_params);
  488. if (scratch_buf_size > 0) {
  489. #if IDF_HEAP_CAPS
  490. void *scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
  491. int align_sz = 16 - (((int32_t) scratch_buf) & 0xf);
  492. #else
  493. void *scratch_buf = memalign(16, scratch_buf_size);
  494. int align_sz = 0;
  495. #endif
  496. if (scratch_buf == NULL) {
  497. printf(ANSI_COLOR_RED"%s scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET, __FUNCTION__, scratch_buf_size);
  498. goto conv_s8_cleanup;
  499. }
  500. esp_nn_set_conv_scratch_buf(scratch_buf + align_sz);
  501. }
  502. if (itr == 0) {
  503. /* enable profiler */
  504. profile_c_start();
  505. }
  506. /* C function */
  507. esp_nn_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 2,
  508. bias, &output_dims, out_data_c, &conv_params, &quant_data);
  509. if (itr == 0) {
  510. profile_c_end();
  511. profile_opt_start();
  512. }
  513. /* Optimized function */
  514. esp_nn_conv_s8(&input_dims, input, &filter_dims, filter_data + 2,
  515. bias, &output_dims, out_data_opt, &conv_params, &quant_data);
  516. if (itr == 0) {
  517. /* disable profiler */
  518. profile_opt_end();
  519. }
  520. bool ret = CHECK_EQUAL(out_data_c, out_data_opt, out_size);
  521. if (ret == false) {
  522. printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
  523. printf("Output: \n");
  524. PRINT_ARRAY_HEX(out_data_opt, out_size / out_ht, out_ht);
  525. printf("Expected: \n");
  526. PRINT_ARRAY_HEX(out_data_c, out_size / out_ht, out_ht);
  527. printf("Input:\n");
  528. PRINT_ARRAY_HEX(input, in_size / in_ht, in_ht);
  529. printf("Filter data:\n");
  530. PRINT_ARRAY_HEX(filter_data + 2, (filter_size - 2) / filter_ht, filter_ht);
  531. printf("bias data:\n");
  532. PRINT_ARRAY_INT(bias, out_channels, 1);
  533. goto conv_s8_cleanup;
  534. }
  535. printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
  536. conv_s8_cleanup:
  537. if (input) {
  538. free(input_orig);
  539. }
  540. if (filter_data) {
  541. free(filter_data);
  542. }
  543. if (out_data_c) {
  544. free(out_c_orig);
  545. }
  546. if (out_data_opt) {
  547. free(out_opt_orig);
  548. }
  549. if (bias) {
  550. free(bias);
  551. }
  552. if (out_shift) {
  553. free(out_shift);
  554. }
  555. if (out_mult) {
  556. free(out_mult);
  557. }
  558. if (scratch_buf) {
  559. free(scratch_buf);
  560. }
  561. }
  562. }