fp_requantize_op.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. /* ----------------------------------------------------------------------
  2. * Project: TinyEngine
  3. * Title: fp_requantize_op.h
  4. *
  5. * Reference papers:
  6. * - MCUNet: Tiny Deep Learning on IoT Device, NeurIPS 2020
  7. * - MCUNetV2: Memory-Efficient Patch-based Inference for Tiny Deep Learning, NeurIPS 2021
  8. * - MCUNetV3: On-Device Training Under 256KB Memory, NeurIPS 2022
  9. * Contact authors:
  10. * - Wei-Ming Chen, wmchen@mit.edu
  11. * - Wei-Chen Wang, wweichen@mit.edu
  12. * - Ji Lin, jilin@mit.edu
  13. * - Ligeng Zhu, ligeng@mit.edu
  14. * - Song Han, songhan@mit.edu
  15. *
  16. * Target ISA: ARMv7E-M
  17. * -------------------------------------------------------------------- */
  18. #ifndef TINYENGINE_INCLUDE_FP_REQUANTIZE_OP_H_
  19. #define TINYENGINE_INCLUDE_FP_REQUANTIZE_OP_H_
  20. tinyengine_status convolve_1x1_s8_ch8_fpreq(const q7_t *input,
  21. const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch,
  22. const q7_t *kernel, const int32_t *bias, const float *scales,
  23. const int32_t out_offset, const int32_t input_offset,
  24. const int32_t out_activation_min, const int32_t out_activation_max,
  25. q7_t *output, const uint16_t output_x, const uint16_t output_y,
  26. const uint16_t output_ch, q15_t *runtime_buf);
  27. tinyengine_status convolve_1x1_s8_ch16_fpreq(const q7_t *input,
  28. const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch,
  29. const q7_t *kernel, const int32_t *bias, const float *scales,
  30. const int32_t out_offset, const int32_t input_offset,
  31. const int32_t out_activation_min, const int32_t out_activation_max,
  32. q7_t *output, const uint16_t output_x, const uint16_t output_y,
  33. const uint16_t output_ch, q15_t *runtime_buf);
  34. tinyengine_status convolve_1x1_s8_ch24_fpreq(const q7_t *input,
  35. const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch,
  36. const q7_t *kernel, const int32_t *bias, const float *scales,
  37. const int32_t out_offset, const int32_t input_offset,
  38. const int32_t out_activation_min, const int32_t out_activation_max,
  39. q7_t *output, const uint16_t output_x, const uint16_t output_y,
  40. const uint16_t output_ch, q15_t *runtime_buf);
  41. tinyengine_status convolve_1x1_s8_ch48_fpreq(const q7_t *input,
  42. const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch,
  43. const q7_t *kernel, const int32_t *bias, const float *scales,
  44. const int32_t out_offset, const int32_t input_offset,
  45. const int32_t out_activation_min, const int32_t out_activation_max,
  46. q7_t *output, const uint16_t output_x, const uint16_t output_y,
  47. const uint16_t output_ch, q15_t *runtime_buf);
  48. tinyengine_status convolve_1x1_s8_fpreq(const q7_t *input,
  49. const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch,
  50. const q7_t *kernel, const int32_t *bias, const float *scales,
  51. const int32_t out_offset, const int32_t input_offset,
  52. const int32_t out_activation_min, const int32_t out_activation_max,
  53. q7_t *output, const uint16_t output_x, const uint16_t output_y,
  54. const uint16_t output_ch, q15_t *runtime_buf);
  55. tinyengine_status convolve_1x1_s8_fpreq_bitmask(const q7_t *input,
  56. const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch,
  57. const q7_t *kernel, const int32_t *bias, const float *scales,
  58. const int32_t out_offset, const int32_t input_offset,
  59. const int32_t out_activation_min, const int32_t out_activation_max,
  60. q7_t *output, q7_t *mask, const uint16_t output_x, const uint16_t output_y,
  61. const uint16_t output_ch, q15_t *runtime_buf);
  62. tinyengine_status convolve_1x1_s8_fpreq_bitmask_partialCH(const q7_t *input,
  63. const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch,
  64. const q7_t *kernel_sram, const q7_t *kernel_flash, const uint16_t first_k_channel, const int32_t *bias, const float *scales,
  65. const int32_t out_offset, const int32_t input_offset,
  66. const int32_t out_activation_min, const int32_t out_activation_max,
  67. q7_t *output, q7_t *mask, const uint16_t output_x, const uint16_t output_y,
  68. const uint16_t output_ch, q15_t *runtime_buf);
  69. q7_t* mat_mult_kernel_s8_s16_reordered_fpreq(const q7_t *input_a,
  70. const q15_t *input_b, const uint16_t output_ch, const float *scales,
  71. const int32_t out_offset, const int16_t activation_min,
  72. const int16_t activation_max, const uint16_t num_col_a,
  73. const int32_t *const output_bias, q7_t *out_0);
  74. q7_t* mat_mult_kernel_s8_s16_reordered_ch8_fpreq(const q7_t *input_a,
  75. const q15_t *input_b, const uint16_t output_ch, const float *scales,
  76. const int32_t out_offset, const int16_t activation_min,
  77. const int16_t activation_max, const uint16_t num_col_a,
  78. const int32_t *const output_bias, q7_t *out_0);
  79. q7_t* mat_mult_kernel_s8_s16_reordered_ch16_fpreq(const q7_t *input_a,
  80. const q15_t *input_b, const uint16_t output_ch, const float *scales,
  81. const int32_t out_offset, const int16_t activation_min,
  82. const int16_t activation_max, const uint16_t num_col_a,
  83. const int32_t *const output_bias, q7_t *out_0);
  84. q7_t* mat_mult_kernel_s8_s16_reordered_ch24_fpreq(const q7_t *input_a,
  85. const q15_t *input_b, const uint16_t output_ch, const float *scales,
  86. const int32_t out_offset, const int16_t activation_min,
  87. const int16_t activation_max, const uint16_t num_col_a,
  88. const int32_t *const output_bias, q7_t *out_0);
  89. q7_t* mat_mult_kernel_s8_s16_reordered_ch48_fpreq(const q7_t *input_a,
  90. const q15_t *input_b, const uint16_t output_ch, const float *scales,
  91. const int32_t out_offset, const int16_t activation_min,
  92. const int16_t activation_max, const uint16_t num_col_a,
  93. const int32_t *const output_bias, q7_t *out_0);
  94. #endif /* TINYENGINE_INCLUDE_FP_REQUANTIZE_OP_H_ */