Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

374 linhas
13KB

  1. /* Audio Library for Teensy 3.X
  2. * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
  3. *
  4. * Development of this audio library was funded by PJRC.COM, LLC by sales of
  5. * Teensy and Audio Adaptor boards. Please support PJRC's efforts to develop
  6. * open source software by purchasing Teensy or other PJRC products.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice, development funding notice, and this permission
  16. * notice shall be included in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #ifndef dspinst_h_
  27. #define dspinst_h_
  28. #include <stdint.h>
  29. // computes limit((val >> rshift), 2**bits)
  30. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
  31. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
  32. {
  33. #if defined (__ARM_ARCH_7EM__)
  34. int32_t out;
  35. asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
  36. return out;
  37. #elif defined(KINETISL)
  38. int32_t out, max;
  39. out = val >> rshift;
  40. max = 1 << (bits - 1);
  41. if (out >= 0) {
  42. if (out > max - 1) out = max - 1;
  43. } else {
  44. if (out < -max) out = -max;
  45. }
  46. return out;
  47. #endif
  48. }
  49. // computes limit(val, 2**bits)
  50. static inline int16_t saturate16(int32_t val) __attribute__((always_inline, unused));
  51. static inline int16_t saturate16(int32_t val)
  52. {
  53. #if defined (__ARM_ARCH_7EM__)
  54. int16_t out;
  55. int32_t tmp;
  56. asm volatile("ssat %0, %1, %2" : "=r" (tmp) : "I" (16), "r" (val) );
  57. out = (int16_t) (tmp & 0xffff); // not sure if the & 0xffff is necessary. test.
  58. return out;
  59. #elif defined(KINETISL)
  60. return 0; // TODO....
  61. #endif
  62. }
  63. // computes ((a[31:0] * b[15:0]) >> 16)
  64. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  65. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
  66. {
  67. #if defined (__ARM_ARCH_7EM__)
  68. int32_t out;
  69. asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  70. return out;
  71. #elif defined(KINETISL)
  72. return ((int64_t)a * (int16_t)(b & 0xFFFF)) >> 16;
  73. #endif
  74. }
  75. // computes ((a[31:0] * b[31:16]) >> 16)
  76. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  77. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
  78. {
  79. #if defined (__ARM_ARCH_7EM__)
  80. int32_t out;
  81. asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  82. return out;
  83. #elif defined(KINETISL)
  84. return ((int64_t)a * (int16_t)(b >> 16)) >> 16;
  85. #endif
  86. }
  87. // computes (((int64_t)a[31:0] * (int64_t)b[31:0]) >> 32)
  88. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
  89. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
  90. {
  91. #if defined (__ARM_ARCH_7EM__)
  92. int32_t out;
  93. asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  94. return out;
  95. #elif defined(KINETISL)
  96. return ((int64_t)a * (int64_t)b) >> 32;
  97. #endif
  98. }
  99. // computes (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  100. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
  101. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
  102. {
  103. #if defined (__ARM_ARCH_7EM__)
  104. int32_t out;
  105. asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  106. return out;
  107. #elif defined(KINETISL)
  108. return (((int64_t)a * (int64_t)b) + 0x8000000) >> 32;
  109. #endif
  110. }
  111. // computes sum + (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  112. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  113. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  114. {
  115. #if defined (__ARM_ARCH_7EM__)
  116. int32_t out;
  117. asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  118. return out;
  119. #elif defined(KINETISL)
  120. return sum + ((((int64_t)a * (int64_t)b) + 0x8000000) >> 32);
  121. #endif
  122. }
  123. // computes sum - (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  124. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  125. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  126. {
  127. #if defined (__ARM_ARCH_7EM__)
  128. int32_t out;
  129. asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  130. return out;
  131. #elif defined(KINETISL)
  132. return sum - ((((int64_t)a * (int64_t)b) + 0x8000000) >> 32);
  133. #endif
  134. }
  135. // computes (a[31:16] | (b[31:16] >> 16))
  136. static inline uint32_t pack_16t_16t(int32_t a, int32_t b) __attribute__((always_inline, unused));
  137. static inline uint32_t pack_16t_16t(int32_t a, int32_t b)
  138. {
  139. #if defined (__ARM_ARCH_7EM__)
  140. int32_t out;
  141. asm volatile("pkhtb %0, %1, %2, asr #16" : "=r" (out) : "r" (a), "r" (b));
  142. return out;
  143. #elif defined(KINETISL)
  144. return (a & 0xFFFF0000) | ((uint32_t)b >> 16);
  145. #endif
  146. }
  147. // computes (a[31:16] | b[15:0])
  148. static inline uint32_t pack_16t_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
  149. static inline uint32_t pack_16t_16b(int32_t a, int32_t b)
  150. {
  151. #if defined (__ARM_ARCH_7EM__)
  152. int32_t out;
  153. asm volatile("pkhtb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  154. return out;
  155. #elif defined(KINETISL)
  156. return (a & 0xFFFF0000) | (b & 0x0000FFFF);
  157. #endif
  158. }
  159. // computes ((a[15:0] << 16) | b[15:0])
  160. static inline uint32_t pack_16b_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
  161. static inline uint32_t pack_16b_16b(int32_t a, int32_t b)
  162. {
  163. #if defined (__ARM_ARCH_7EM__)
  164. int32_t out;
  165. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  166. return out;
  167. #elif defined(KINETISL)
  168. return (a << 16) | (b & 0x0000FFFF);
  169. #endif
  170. }
  171. // computes ((a[15:0] << 16) | b[15:0])
  172. /*
  173. static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  174. static inline uint32_t pack_16x16(int32_t a, int32_t b)
  175. {
  176. int32_t out;
  177. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  178. return out;
  179. }
  180. */
  181. #if defined (__ARM_ARCH_7EM__)
  182. // computes (((a[31:16] + b[31:16]) << 16) | (a[15:0 + b[15:0])) (saturates)
  183. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  184. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
  185. {
  186. int32_t out;
  187. asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  188. return out;
  189. }
  190. // computes (((a[31:16] - b[31:16]) << 16) | (a[15:0 - b[15:0])) (saturates)
  191. static inline int32_t signed_subtract_16_and_16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  192. static inline int32_t signed_subtract_16_and_16(int32_t a, int32_t b)
  193. {
  194. int32_t out;
  195. asm volatile("qsub16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  196. return out;
  197. }
  198. // computes out = (((a[31:16]+b[31:16])/2) <<16) | ((a[15:0]+b[15:0])/2)
  199. static inline int32_t signed_halving_add_16_and_16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  200. static inline int32_t signed_halving_add_16_and_16(int32_t a, int32_t b)
  201. {
  202. int32_t out;
  203. asm volatile("shadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  204. return out;
  205. }
  206. // computes out = (((a[31:16]-b[31:16])/2) <<16) | ((a[15:0]-b[15:0])/2)
  207. static inline int32_t signed_halving_subtract_16_and_16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  208. static inline int32_t signed_halving_subtract_16_and_16(int32_t a, int32_t b)
  209. {
  210. int32_t out;
  211. asm volatile("shsub16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  212. return out;
  213. }
  214. // computes (sum + ((a[31:0] * b[15:0]) >> 16))
  215. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  216. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
  217. {
  218. int32_t out;
  219. asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  220. return out;
  221. }
  222. // computes (sum + ((a[31:0] * b[31:16]) >> 16))
  223. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  224. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
  225. {
  226. int32_t out;
  227. asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  228. return out;
  229. }
  230. // computes logical and, forces compiler to allocate register and use single cycle instruction
  231. static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  232. static inline uint32_t logical_and(uint32_t a, uint32_t b)
  233. {
  234. asm volatile("and %0, %1" : "+r" (a) : "r" (b));
  235. return a;
  236. }
  237. // computes ((a[15:0] * b[15:0]) + (a[31:16] * b[31:16]))
  238. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  239. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b)
  240. {
  241. int32_t out;
  242. asm volatile("smuad %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  243. return out;
  244. }
  245. // computes ((a[15:0] * b[31:16]) + (a[31:16] * b[15:0]))
  246. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  247. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b)
  248. {
  249. int32_t out;
  250. asm volatile("smuadx %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  251. return out;
  252. }
  253. // // computes sum += ((a[15:0] * b[15:0]) + (a[31:16] * b[31:16]))
  254. static inline int64_t multiply_accumulate_16tx16t_add_16bx16b(int64_t sum, uint32_t a, uint32_t b)
  255. {
  256. asm volatile("smlald %Q0, %R0, %1, %2" : "+r" (sum) : "r" (a), "r" (b));
  257. return sum;
  258. }
  259. // // computes sum += ((a[15:0] * b[31:16]) + (a[31:16] * b[15:0]))
  260. static inline int64_t multiply_accumulate_16tx16b_add_16bx16t(int64_t sum, uint32_t a, uint32_t b)
  261. {
  262. asm volatile("smlaldx %Q0, %R0, %1, %2" : "+r" (sum) : "r" (a), "r" (b));
  263. return sum;
  264. }
  265. // computes ((a[15:0] * b[15:0])
  266. static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  267. static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b)
  268. {
  269. int32_t out;
  270. asm volatile("smulbb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  271. return out;
  272. }
  273. // computes ((a[15:0] * b[31:16])
  274. static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  275. static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b)
  276. {
  277. int32_t out;
  278. asm volatile("smulbt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  279. return out;
  280. }
  281. // computes ((a[31:16] * b[15:0])
  282. static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  283. static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b)
  284. {
  285. int32_t out;
  286. asm volatile("smultb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  287. return out;
  288. }
  289. // computes ((a[31:16] * b[31:16])
  290. static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  291. static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b)
  292. {
  293. int32_t out;
  294. asm volatile("smultt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  295. return out;
  296. }
  297. // computes (a - b), result saturated to 32 bit integer range
  298. static inline int32_t substract_32_saturate(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  299. static inline int32_t substract_32_saturate(uint32_t a, uint32_t b)
  300. {
  301. int32_t out;
  302. asm volatile("qsub %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  303. return out;
  304. }
  305. // Multiply two S.31 fractional integers, and return the 32 most significant
  306. // bits after a shift left by the constant z.
  307. // This comes from rockbox.org
  308. static inline int32_t FRACMUL_SHL(int32_t x, int32_t y, int z)
  309. {
  310. int32_t t, t2;
  311. asm ("smull %[t], %[t2], %[a], %[b]\n\t"
  312. "mov %[t2], %[t2], asl %[c]\n\t"
  313. "orr %[t], %[t2], %[t], lsr %[d]\n\t"
  314. : [t] "=&r" (t), [t2] "=&r" (t2)
  315. : [a] "r" (x), [b] "r" (y),
  316. [c] "Mr" ((z) + 1), [d] "Mr" (31 - (z)));
  317. return t;
  318. }
  319. #endif
  320. //get Q from PSR
  321. static inline uint32_t get_q_psr(void) __attribute__((always_inline, unused));
  322. static inline uint32_t get_q_psr(void)
  323. {
  324. uint32_t out;
  325. asm ("mrs %0, APSR" : "=r" (out));
  326. return (out & 0x8000000)>>27;
  327. }
  328. //clear Q BIT in PSR
  329. static inline void clr_q_psr(void) __attribute__((always_inline, unused));
  330. static inline void clr_q_psr(void)
  331. {
  332. uint32_t t;
  333. asm ("mov %[t],#0\n"
  334. "msr APSR_nzcvq,%0\n" : [t] "=&r" (t)::"cc");
  335. }
  336. #endif