選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

267 行
9.4KB

  1. /* Audio Library for Teensy 3.X
  2. * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
  3. *
  4. * Development of this audio library was funded by PJRC.COM, LLC by sales of
  5. * Teensy and Audio Adaptor boards. Please support PJRC's efforts to develop
  6. * open source software by purchasing Teensy or other PJRC products.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice, development funding notice, and this permission
  16. * notice shall be included in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #ifndef dspinst_h_
  27. #define dspinst_h_
  28. #include <stdint.h>
  29. // computes limit((val >> rshift), 2**bits)
  30. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
  31. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
  32. {
  33. #if defined(KINETISK)
  34. int32_t out;
  35. asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
  36. return out;
  37. #elif defined(KINETISL)
  38. int32_t out, max;
  39. out = val >> rshift;
  40. max = 1 << (bits - 1);
  41. if (out >= 0) {
  42. if (out > max - 1) out = max - 1;
  43. } else {
  44. if (out < -max) out = -max;
  45. }
  46. return out;
  47. #endif
  48. }
  49. // computes ((a[31:0] * b[15:0]) >> 16)
  50. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  51. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
  52. {
  53. #if defined(KINETISK)
  54. int32_t out;
  55. asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  56. return out;
  57. #elif defined(KINETISL)
  58. return ((int64_t)a * (int16_t)(b & 0xFFFF)) >> 16;
  59. #endif
  60. }
  61. // computes ((a[31:0] * b[31:16]) >> 16)
  62. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  63. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
  64. {
  65. #if defined(KINETISK)
  66. int32_t out;
  67. asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  68. return out;
  69. #elif defined(KINETISL)
  70. return ((int64_t)a * (int16_t)(b >> 16)) >> 16;
  71. #endif
  72. }
  73. // computes (((int64_t)a[31:0] * (int64_t)b[31:0]) >> 32)
  74. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
  75. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
  76. {
  77. int32_t out;
  78. asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  79. return out;
  80. }
  81. // computes (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  82. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
  83. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
  84. {
  85. int32_t out;
  86. asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  87. return out;
  88. }
  89. // computes sum + (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  90. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  91. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  92. {
  93. int32_t out;
  94. asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  95. return out;
  96. }
  97. // computes sum - (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  98. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  99. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  100. {
  101. int32_t out;
  102. asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  103. return out;
  104. }
  105. // computes (a[31:16] | (b[31:16] >> 16))
  106. static inline uint32_t pack_16t_16t(int32_t a, int32_t b) __attribute__((always_inline, unused));
  107. static inline uint32_t pack_16t_16t(int32_t a, int32_t b)
  108. {
  109. #if defined(KINETISK)
  110. int32_t out;
  111. asm volatile("pkhtb %0, %1, %2, asr #16" : "=r" (out) : "r" (a), "r" (b));
  112. return out;
  113. #elif defined(KINETISL)
  114. return (a & 0xFFFF0000) | ((uint32_t)b >> 16);
  115. #endif
  116. }
  117. // computes (a[31:16] | b[15:0])
  118. static inline uint32_t pack_16t_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
  119. static inline uint32_t pack_16t_16b(int32_t a, int32_t b)
  120. {
  121. #if defined(KINETISK)
  122. int32_t out;
  123. asm volatile("pkhtb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  124. return out;
  125. #elif defined(KINETISL)
  126. return (a & 0xFFFF0000) | (b & 0x0000FFFF);
  127. #endif
  128. }
  129. // computes ((a[15:0] << 16) | b[15:0])
  130. static inline uint32_t pack_16b_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
  131. static inline uint32_t pack_16b_16b(int32_t a, int32_t b)
  132. {
  133. #if defined(KINETISK)
  134. int32_t out;
  135. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  136. return out;
  137. #elif defined(KINETISL)
  138. return (a << 16) | (b & 0x0000FFFF);
  139. #endif
  140. }
  141. // computes ((a[15:0] << 16) | b[15:0])
  142. /*
  143. static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  144. static inline uint32_t pack_16x16(int32_t a, int32_t b)
  145. {
  146. int32_t out;
  147. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  148. return out;
  149. }
  150. */
  151. // computes (((a[31:16] + b[31:16]) << 16) | (a[15:0 + b[15:0]))
  152. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  153. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
  154. {
  155. int32_t out;
  156. asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  157. return out;
  158. }
  159. // computes (sum + ((a[31:0] * b[15:0]) >> 16))
  160. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  161. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
  162. {
  163. int32_t out;
  164. asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  165. return out;
  166. }
  167. // computes (sum + ((a[31:0] * b[31:16]) >> 16))
  168. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  169. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
  170. {
  171. int32_t out;
  172. asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  173. return out;
  174. }
  175. // computes logical and, forces compiler to allocate register and use single cycle instruction
  176. static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  177. static inline uint32_t logical_and(uint32_t a, uint32_t b)
  178. {
  179. asm volatile("and %0, %1" : "+r" (a) : "r" (b));
  180. return a;
  181. }
  182. // computes ((a[15:0] * b[15:0]) + (a[31:16] * b[31:16]))
  183. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  184. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b)
  185. {
  186. int32_t out;
  187. asm volatile("smuad %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  188. return out;
  189. }
  190. // computes ((a[15:0] * b[31:16]) + (a[31:16] * b[15:0]))
  191. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  192. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b)
  193. {
  194. int32_t out;
  195. asm volatile("smuadx %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  196. return out;
  197. }
  198. // computes ((a[15:0] * b[15:0])
  199. static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  200. static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b)
  201. {
  202. int32_t out;
  203. asm volatile("smulbb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  204. return out;
  205. }
  206. // computes ((a[15:0] * b[31:16])
  207. static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  208. static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b)
  209. {
  210. int32_t out;
  211. asm volatile("smulbt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  212. return out;
  213. }
  214. // computes ((a[31:16] * b[15:0])
  215. static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  216. static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b)
  217. {
  218. int32_t out;
  219. asm volatile("smultb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  220. return out;
  221. }
  222. // computes ((a[31:16] * b[31:16])
  223. static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  224. static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b)
  225. {
  226. int32_t out;
  227. asm volatile("smultt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  228. return out;
  229. }
  230. // computes (a - b), result saturated to 32 bit integer range
  231. static inline int32_t substract_32_saturate(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  232. static inline int32_t substract_32_saturate(uint32_t a, uint32_t b)
  233. {
  234. int32_t out;
  235. asm volatile("qsub %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  236. return out;
  237. }
  238. #endif