arm_nnsupportfunctions.h 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. /*
  2. * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /* ----------------------------------------------------------------------
  19. * Project: CMSIS NN Library
  20. * Title: arm_nnsupportfunctions.h
  21. * Description: Public header file of support functions for CMSIS NN Library
  22. *
  23. * $Date: 13. July 2018
  24. * $Revision: V.1.0.0
  25. *
  26. * Target Processor: Cortex-M cores
  27. * -------------------------------------------------------------------- */
  28. #ifndef _ARM_NNSUPPORTFUNCTIONS_H_
  29. #define _ARM_NNSUPPORTFUNCTIONS_H_
  30. #include "arm_math.h"
  31. #include "arm_common_tables.h"
  32. #ifdef __cplusplus
  33. extern "C"
  34. {
  35. #endif
  36. #define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
  37. #define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
  38. #define Q31_MIN (0x80000000L)
  39. #define Q31_MAX (0x7FFFFFFFL)
  40. /**
  41. * @brief Union for SIMD access of Q31/Q15/Q7 types
  42. */
  43. union arm_nnword
  44. {
  45. q31_t word;
  46. /**< Q31 type */
  47. q15_t half_words[2];
  48. /**< Q15 type */
  49. q7_t bytes[4];
  50. /**< Q7 type */
  51. };
  52. /**
  53. * @brief Struct for specifying activation function types
  54. *
  55. */
  56. typedef enum
  57. {
  58. ARM_SIGMOID = 0,
  59. /**< Sigmoid activation function */
  60. ARM_TANH = 1,
  61. /**< Tanh activation function */
  62. } arm_nn_activation_type;
  63. /**
  64. * @defgroup nndata_convert Neural Network Data Conversion Functions
  65. *
  66. * Perform data type conversion in-between neural network operations
  67. *
  68. */
  69. /**
  70. * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift
  71. * @param[in] *pSrc points to the Q7 input vector
  72. * @param[out] *pDst points to the Q15 output vector
  73. * @param[in] blockSize length of the input vector
  74. * @return none.
  75. *
  76. */
  77. void arm_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
  78. /**
  79. * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
  80. * @param[in] *pSrc points to the Q7 input vector
  81. * @param[out] *pDst points to the Q15 output vector
  82. * @param[in] blockSize length of the input vector
  83. * @return none.
  84. *
  85. */
  86. void arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
  87. #if defined (ARM_MATH_DSP)
  88. /**
  89. * @brief read and expand one Q7 word into two Q15 words
  90. */
  91. __STATIC_FORCEINLINE void *read_and_pad(void *source, q31_t * out1, q31_t * out2)
  92. {
  93. q31_t inA = *__SIMD32(source)++;
  94. q31_t inAbuf1 = __SXTB16(__ROR(inA, 8));
  95. q31_t inAbuf2 = __SXTB16(inA);
  96. #ifndef ARM_MATH_BIG_ENDIAN
  97. *out2 = __PKHTB(inAbuf1, inAbuf2, 16);
  98. *out1 = __PKHBT(inAbuf2, inAbuf1, 16);
  99. #else
  100. *out1 = __PKHTB(inAbuf1, inAbuf2, 16);
  101. *out2 = __PKHBT(inAbuf2, inAbuf1, 16);
  102. #endif
  103. return source;
  104. }
  105. /**
  106. * @brief read and expand one Q7 word into two Q15 words with reordering
  107. */
  108. __STATIC_FORCEINLINE void *read_and_pad_reordered(void *source, q31_t * out1, q31_t * out2)
  109. {
  110. q31_t inA = *__SIMD32(source)++;
  111. #ifndef ARM_MATH_BIG_ENDIAN
  112. *out2 = __SXTB16(__ROR(inA, 8));
  113. *out1 = __SXTB16(inA);
  114. #else
  115. *out1 = __SXTB16(__ROR(inA, 8));
  116. *out2 = __SXTB16(inA);
  117. #endif
  118. return source;
  119. }
  120. #endif
  121. /**
  122. * @defgroup NNBasicMath Basic Math Functions for Neural Network Computation
  123. *
  124. * Basic Math Functions for Neural Network Computation
  125. *
  126. */
  127. /**
  128. * @brief Q7 vector multiplication with variable output shifts
  129. * @param[in] *pSrcA pointer to the first input vector
  130. * @param[in] *pSrcB pointer to the second input vector
  131. * @param[out] *pDst pointer to the output vector
  132. * @param[in] out_shift amount of right-shift for output
  133. * @param[in] blockSize number of samples in each vector
  134. * @return none.
  135. *
  136. * <b>Scaling and Overflow Behavior:</b>
  137. * \par
  138. * The function uses saturating arithmetic.
  139. * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
  140. */
  141. void arm_nn_mult_q15(
  142. q15_t * pSrcA,
  143. q15_t * pSrcB,
  144. q15_t * pDst,
  145. const uint16_t out_shift,
  146. uint32_t blockSize);
  147. /**
  148. * @brief Q7 vector multiplication with variable output shifts
  149. * @param[in] *pSrcA pointer to the first input vector
  150. * @param[in] *pSrcB pointer to the second input vector
  151. * @param[out] *pDst pointer to the output vector
  152. * @param[in] out_shift amount of right-shift for output
  153. * @param[in] blockSize number of samples in each vector
  154. * @return none.
  155. *
  156. * <b>Scaling and Overflow Behavior:</b>
  157. * \par
  158. * The function uses saturating arithmetic.
  159. * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
  160. */
  161. void arm_nn_mult_q7(
  162. q7_t * pSrcA,
  163. q7_t * pSrcB,
  164. q7_t * pDst,
  165. const uint16_t out_shift,
  166. uint32_t blockSize);
  167. /**
  168. * @brief macro for adding rounding offset
  169. */
  170. #ifndef ARM_NN_TRUNCATE
  171. #define NN_ROUND(out_shift) ( (0x1u << out_shift) >> 1 )
  172. #else
  173. #define NN_ROUND(out_shift) 0
  174. #endif
  175. /**
  176. * @brief Saturating doubling high multiply. Result matches
  177. * NEON instruction VQRDMULH.
  178. * @param[in] m1 Multiplicand
  179. * @param[in] m2 Multiplier
  180. * @return Result of multiplication.
  181. *
  182. */
  183. __STATIC_FORCEINLINE q31_t arm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
  184. {
  185. q31_t result = 0;
  186. // Rounding offset to add for a right shift of 31
  187. q63_t mult = 1 << 30;
  188. if ((m1 < 0) ^ (m2 < 0))
  189. {
  190. mult = 1 - mult;
  191. }
  192. // Gets resolved as a SMLAL instruction
  193. mult = mult + (q63_t)m1 * m2;
  194. // Utilize all of the upper 32 bits. This is the doubling step
  195. // as well.
  196. result = mult / (1UL << 31);
  197. if ((m1 == m2) && (m1 == Q31_MIN))
  198. {
  199. result = Q31_MAX;
  200. }
  201. return result;
  202. }
  203. /**
  204. * @brief Rounding divide by power of two.
  205. * @param[in] dividend - Dividend
  206. * @param[in] exponent - Divisor = power(2, exponent)
  207. * Range: [0, 31]
  208. * @return Rounded result of division. Midpoint is rounded away from zero.
  209. *
  210. */
  211. __STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
  212. {
  213. q31_t result = 0;
  214. const q31_t remainder_mask = (1l << exponent) - 1;
  215. int32_t remainder = remainder_mask & dividend;
  216. // Basic division
  217. result = dividend >> exponent;
  218. // Adjust 'result' for rounding (mid point away from zero)
  219. q31_t threshold = remainder_mask >> 1;
  220. if (result < 0)
  221. {
  222. threshold++;
  223. }
  224. if (remainder > threshold)
  225. {
  226. result++;
  227. }
  228. return result;
  229. }
  230. #ifdef __cplusplus
  231. }
  232. #endif
  233. #endif