Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

233 lines
8.7KB

  1. /* Audio Library for Teensy 3.X
  2. * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
  3. *
  4. * Development of this audio library was funded by PJRC.COM, LLC by sales of
  5. * Teensy and Audio Adaptor boards. Please support PJRC's efforts to develop
  6. * open source software by purchasing Teensy or other PJRC products.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice, development funding notice, and this permission
  16. * notice shall be included in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #ifndef dspinst_h_
  27. #define dspinst_h_
  28. #include <stdint.h>
  29. // computes limit((val >> rshift), 2**bits)
  30. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
  31. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
  32. {
  33. int32_t out;
  34. asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
  35. return out;
  36. }
  37. // computes ((a[31:0] * b[15:0]) >> 16)
  38. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  39. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
  40. {
  41. int32_t out;
  42. asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  43. return out;
  44. }
  45. // computes ((a[31:0] * b[31:16]) >> 16)
  46. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  47. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
  48. {
  49. int32_t out;
  50. asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  51. return out;
  52. }
  53. // computes (((int64_t)a[31:0] * (int64_t)b[31:0]) >> 32)
  54. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
  55. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
  56. {
  57. int32_t out;
  58. asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  59. return out;
  60. }
  61. // computes (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  62. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
  63. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
  64. {
  65. int32_t out;
  66. asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  67. return out;
  68. }
  69. // computes sum + (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  70. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  71. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  72. {
  73. int32_t out;
  74. asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  75. return out;
  76. }
  77. // computes sum - (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  78. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  79. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  80. {
  81. int32_t out;
  82. asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  83. return out;
  84. }
  85. // computes (a[31:16] | (b[31:16] >> 16))
  86. static inline uint32_t pack_16t_16t(int32_t a, int32_t b) __attribute__((always_inline, unused));
  87. static inline uint32_t pack_16t_16t(int32_t a, int32_t b)
  88. {
  89. int32_t out;
  90. asm volatile("pkhtb %0, %1, %2, asr #16" : "=r" (out) : "r" (a), "r" (b));
  91. return out;
  92. }
  93. // computes (a[31:16] | b[15:0])
  94. static inline uint32_t pack_16t_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
  95. static inline uint32_t pack_16t_16b(int32_t a, int32_t b)
  96. {
  97. int32_t out;
  98. asm volatile("pkhtb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  99. return out;
  100. }
  101. // computes ((a[15:0] << 16) | b[15:0])
  102. static inline uint32_t pack_16b_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
  103. static inline uint32_t pack_16b_16b(int32_t a, int32_t b)
  104. {
  105. int32_t out;
  106. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  107. return out;
  108. }
  109. // computes ((a[15:0] << 16) | b[15:0])
  110. static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  111. static inline uint32_t pack_16x16(int32_t a, int32_t b)
  112. {
  113. int32_t out;
  114. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  115. return out;
  116. }
  117. // computes (((a[31:16] + b[31:16]) << 16) | (a[15:0 + b[15:0]))
  118. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  119. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
  120. {
  121. int32_t out;
  122. asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  123. return out;
  124. }
  125. // computes (sum + ((a[31:0] * b[15:0]) >> 16))
  126. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  127. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
  128. {
  129. int32_t out;
  130. asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  131. return out;
  132. }
  133. // computes (sum + ((a[31:0] * b[31:16]) >> 16))
  134. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  135. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
  136. {
  137. int32_t out;
  138. asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  139. return out;
  140. }
  141. // computes logical and, forces compiler to allocate register and use single cycle instruction
  142. static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  143. static inline uint32_t logical_and(uint32_t a, uint32_t b)
  144. {
  145. asm volatile("and %0, %1" : "+r" (a) : "r" (b));
  146. return a;
  147. }
  148. // computes ((a[15:0] * b[15:0]) + (a[31:16] * b[31:16]))
  149. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  150. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b)
  151. {
  152. int32_t out;
  153. asm volatile("smuad %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  154. return out;
  155. }
  156. // computes ((a[15:0] * b[31:16]) + (a[31:16] * b[15:0]))
  157. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  158. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b)
  159. {
  160. int32_t out;
  161. asm volatile("smuadx %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  162. return out;
  163. }
  164. // computes ((a[15:0] * b[15:0])
  165. static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  166. static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b)
  167. {
  168. int32_t out;
  169. asm volatile("smulbb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  170. return out;
  171. }
  172. // computes ((a[15:0] * b[31:16])
  173. static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  174. static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b)
  175. {
  176. int32_t out;
  177. asm volatile("smulbt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  178. return out;
  179. }
  180. // computes ((a[31:16] * b[15:0])
  181. static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  182. static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b)
  183. {
  184. int32_t out;
  185. asm volatile("smultb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  186. return out;
  187. }
  188. // computes ((a[31:16] * b[31:16])
  189. static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  190. static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b)
  191. {
  192. int32_t out;
  193. asm volatile("smultt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  194. return out;
  195. }
  196. // computes (a - b), result saturated to 32 bit integer range
  197. static inline int32_t substract_32_saturate(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  198. static inline int32_t substract_32_saturate(uint32_t a, uint32_t b)
  199. {
  200. int32_t out;
  201. asm volatile("qsub %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  202. return out;
  203. }
  204. #endif