選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

371 行
13KB

  1. /* Audio Library for Teensy 3.X
  2. * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
  3. *
  4. * Development of this audio library was funded by PJRC.COM, LLC by sales of
  5. * Teensy and Audio Adaptor boards. Please support PJRC's efforts to develop
  6. * open source software by purchasing Teensy or other PJRC products.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice, development funding notice, and this permission
  16. * notice shall be included in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #ifndef dspinst_h_
  27. #define dspinst_h_
  28. #include <stdint.h>
  29. // computes limit((val >> rshift), 2**bits)
  30. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
  31. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
  32. {
  33. #if defined (__ARM_ARCH_7EM__)
  34. int32_t out;
  35. asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
  36. return out;
  37. #elif defined(KINETISL)
  38. int32_t out, max;
  39. out = val >> rshift;
  40. max = 1 << (bits - 1);
  41. if (out >= 0) {
  42. if (out > max - 1) out = max - 1;
  43. } else {
  44. if (out < -max) out = -max;
  45. }
  46. return out;
  47. #endif
  48. }
  49. // computes limit(val, 2**bits)
  50. static inline int16_t saturate16(int32_t val) __attribute__((always_inline, unused));
  51. static inline int16_t saturate16(int32_t val)
  52. {
  53. #if defined (__ARM_ARCH_7EM__)
  54. int16_t out;
  55. int32_t tmp;
  56. asm volatile("ssat %0, %1, %2" : "=r" (tmp) : "I" (16), "r" (val) );
  57. out = (int16_t) (tmp & 0xffff); // not sure if the & 0xffff is necessary. test.
  58. return out;
  59. #elif defined(KINETISL)
  60. return 0; // TODO....
  61. #endif
  62. }
  63. // computes ((a[31:0] * b[15:0]) >> 16)
  64. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  65. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
  66. {
  67. #if defined (__ARM_ARCH_7EM__)
  68. int32_t out;
  69. asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  70. return out;
  71. #elif defined(KINETISL)
  72. return ((int64_t)a * (int16_t)(b & 0xFFFF)) >> 16;
  73. #endif
  74. }
  75. // computes ((a[31:0] * b[31:16]) >> 16)
  76. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  77. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
  78. {
  79. #if defined (__ARM_ARCH_7EM__)
  80. int32_t out;
  81. asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  82. return out;
  83. #elif defined(KINETISL)
  84. return ((int64_t)a * (int16_t)(b >> 16)) >> 16;
  85. #endif
  86. }
  87. // computes (((int64_t)a[31:0] * (int64_t)b[31:0]) >> 32)
  88. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
  89. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
  90. {
  91. #if defined (__ARM_ARCH_7EM__)
  92. int32_t out;
  93. asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  94. return out;
  95. #elif defined(KINETISL)
  96. return 0; // TODO....
  97. #endif
  98. }
  99. // computes (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  100. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
  101. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
  102. {
  103. #if defined (__ARM_ARCH_7EM__)
  104. int32_t out;
  105. asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  106. return out;
  107. #elif defined(KINETISL)
  108. return 0; // TODO....
  109. #endif
  110. }
  111. // computes sum + (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  112. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  113. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  114. {
  115. #if defined (__ARM_ARCH_7EM__)
  116. int32_t out;
  117. asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  118. return out;
  119. #elif defined(KINETISL)
  120. return 0; // TODO....
  121. #endif
  122. }
  123. // computes sum - (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  124. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  125. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  126. {
  127. #if defined (__ARM_ARCH_7EM__)
  128. int32_t out;
  129. asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  130. return out;
  131. #elif defined(KINETISL)
  132. return 0; // TODO....
  133. #endif
  134. }
  135. // computes (a[31:16] | (b[31:16] >> 16))
  136. static inline uint32_t pack_16t_16t(int32_t a, int32_t b) __attribute__((always_inline, unused));
  137. static inline uint32_t pack_16t_16t(int32_t a, int32_t b)
  138. {
  139. #if defined (__ARM_ARCH_7EM__)
  140. int32_t out;
  141. asm volatile("pkhtb %0, %1, %2, asr #16" : "=r" (out) : "r" (a), "r" (b));
  142. return out;
  143. #elif defined(KINETISL)
  144. return (a & 0xFFFF0000) | ((uint32_t)b >> 16);
  145. #endif
  146. }
  147. // computes (a[31:16] | b[15:0])
  148. static inline uint32_t pack_16t_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
  149. static inline uint32_t pack_16t_16b(int32_t a, int32_t b)
  150. {
  151. #if defined (__ARM_ARCH_7EM__)
  152. int32_t out;
  153. asm volatile("pkhtb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  154. return out;
  155. #elif defined(KINETISL)
  156. return (a & 0xFFFF0000) | (b & 0x0000FFFF);
  157. #endif
  158. }
  159. // computes ((a[15:0] << 16) | b[15:0])
  160. static inline uint32_t pack_16b_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
  161. static inline uint32_t pack_16b_16b(int32_t a, int32_t b)
  162. {
  163. #if defined (__ARM_ARCH_7EM__)
  164. int32_t out;
  165. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  166. return out;
  167. #elif defined(KINETISL)
  168. return (a << 16) | (b & 0x0000FFFF);
  169. #endif
  170. }
  171. // computes ((a[15:0] << 16) | b[15:0])
  172. /*
  173. static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  174. static inline uint32_t pack_16x16(int32_t a, int32_t b)
  175. {
  176. int32_t out;
  177. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  178. return out;
  179. }
  180. */
  181. // computes (((a[31:16] + b[31:16]) << 16) | (a[15:0 + b[15:0])) (saturates)
  182. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  183. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
  184. {
  185. int32_t out;
  186. asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  187. return out;
  188. }
  189. // computes (((a[31:16] - b[31:16]) << 16) | (a[15:0 - b[15:0])) (saturates)
  190. static inline int32_t signed_subtract_16_and_16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  191. static inline int32_t signed_subtract_16_and_16(int32_t a, int32_t b)
  192. {
  193. int32_t out;
  194. asm volatile("qsub16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  195. return out;
  196. }
  197. // computes out = (((a[31:16]+b[31:16])/2) <<16) | ((a[15:0]+b[15:0])/2)
  198. static inline int32_t signed_halving_add_16_and_16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  199. static inline int32_t signed_halving_add_16_and_16(int32_t a, int32_t b)
  200. {
  201. int32_t out;
  202. asm volatile("shadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  203. return out;
  204. }
  205. // computes out = (((a[31:16]-b[31:16])/2) <<16) | ((a[15:0]-b[15:0])/2)
  206. static inline int32_t signed_halving_subtract_16_and_16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  207. static inline int32_t signed_halving_subtract_16_and_16(int32_t a, int32_t b)
  208. {
  209. int32_t out;
  210. asm volatile("shsub16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  211. return out;
  212. }
  213. // computes (sum + ((a[31:0] * b[15:0]) >> 16))
  214. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  215. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
  216. {
  217. int32_t out;
  218. asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  219. return out;
  220. }
  221. // computes (sum + ((a[31:0] * b[31:16]) >> 16))
  222. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  223. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
  224. {
  225. int32_t out;
  226. asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  227. return out;
  228. }
  229. // computes logical and, forces compiler to allocate register and use single cycle instruction
  230. static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  231. static inline uint32_t logical_and(uint32_t a, uint32_t b)
  232. {
  233. asm volatile("and %0, %1" : "+r" (a) : "r" (b));
  234. return a;
  235. }
  236. // computes ((a[15:0] * b[15:0]) + (a[31:16] * b[31:16]))
  237. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  238. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b)
  239. {
  240. int32_t out;
  241. asm volatile("smuad %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  242. return out;
  243. }
  244. // computes ((a[15:0] * b[31:16]) + (a[31:16] * b[15:0]))
  245. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  246. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b)
  247. {
  248. int32_t out;
  249. asm volatile("smuadx %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  250. return out;
  251. }
  252. // // computes sum += ((a[15:0] * b[15:0]) + (a[31:16] * b[31:16]))
  253. static inline int64_t multiply_accumulate_16tx16t_add_16bx16b(int64_t sum, uint32_t a, uint32_t b)
  254. {
  255. asm volatile("smlald %Q0, %R0, %1, %2" : "+r" (sum) : "r" (a), "r" (b));
  256. return sum;
  257. }
  258. // // computes sum += ((a[15:0] * b[31:16]) + (a[31:16] * b[15:0]))
  259. static inline int64_t multiply_accumulate_16tx16b_add_16bx16t(int64_t sum, uint32_t a, uint32_t b)
  260. {
  261. asm volatile("smlaldx %Q0, %R0, %1, %2" : "+r" (sum) : "r" (a), "r" (b));
  262. return sum;
  263. }
  264. // computes ((a[15:0] * b[15:0])
  265. static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  266. static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b)
  267. {
  268. int32_t out;
  269. asm volatile("smulbb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  270. return out;
  271. }
  272. // computes ((a[15:0] * b[31:16])
  273. static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  274. static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b)
  275. {
  276. int32_t out;
  277. asm volatile("smulbt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  278. return out;
  279. }
  280. // computes ((a[31:16] * b[15:0])
  281. static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  282. static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b)
  283. {
  284. int32_t out;
  285. asm volatile("smultb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  286. return out;
  287. }
  288. // computes ((a[31:16] * b[31:16])
  289. static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  290. static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b)
  291. {
  292. int32_t out;
  293. asm volatile("smultt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  294. return out;
  295. }
  296. // computes (a - b), result saturated to 32 bit integer range
  297. static inline int32_t substract_32_saturate(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  298. static inline int32_t substract_32_saturate(uint32_t a, uint32_t b)
  299. {
  300. int32_t out;
  301. asm volatile("qsub %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  302. return out;
  303. }
  304. //get Q from PSR
  305. static inline uint32_t get_q_psr(void) __attribute__((always_inline, unused));
  306. static inline uint32_t get_q_psr(void)
  307. {
  308. uint32_t out;
  309. asm ("mrs %0, APSR" : "=r" (out));
  310. return (out & 0x8000000)>>27;
  311. }
  312. //clear Q BIT in PSR
  313. static inline void clr_q_psr(void) __attribute__((always_inline, unused));
  314. static inline void clr_q_psr(void)
  315. {
  316. uint32_t t;
  317. asm ("mov %[t],#0\n"
  318. "msr APSR_nzcvq,%0\n" : [t] "=&r" (t)::"cc");
  319. }
  320. // Multiply two S.31 fractional integers, and return the 32 most significant
  321. // bits after a shift left by the constant z.
  322. // This comes from rockbox.org
  323. static inline int32_t FRACMUL_SHL(int32_t x, int32_t y, int z)
  324. {
  325. int32_t t, t2;
  326. asm ("smull %[t], %[t2], %[a], %[b]\n\t"
  327. "mov %[t2], %[t2], asl %[c]\n\t"
  328. "orr %[t], %[t2], %[t], lsr %[d]\n\t"
  329. : [t] "=&r" (t), [t2] "=&r" (t2)
  330. : [a] "r" (x), [b] "r" (y),
  331. [c] "Mr" ((z) + 1), [d] "Mr" (31 - (z)));
  332. return t;
  333. }
  334. #endif