You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
6.3KB

  1. /* Audio Library for Teensy 3.X
  2. * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
  3. *
  4. * Development of this audio library was funded by PJRC.COM, LLC by sales of
  5. * Teensy and Audio Adaptor boards. Please support PJRC's efforts to develop
  6. * open source software by purchasing Teensy or other PJRC products.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice, development funding notice, and this permission
  16. * notice shall be included in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #ifndef dspinst_h_
  27. #define dspinst_h_
  28. #include <stdint.h>
  29. // computes limit((val >> rshift), 2**bits)
  30. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
  31. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
  32. {
  33. int32_t out;
  34. asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
  35. return out;
  36. }
  37. // computes ((a[31:0] * b[15:0]) >> 16)
  38. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  39. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
  40. {
  41. int32_t out;
  42. asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  43. return out;
  44. }
  45. // computes ((a[31:0] * b[31:16]) >> 16)
  46. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  47. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
  48. {
  49. int32_t out;
  50. asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  51. return out;
  52. }
  53. // computes (((int64_t)a[31:0] * (int64_t)b[31:0]) >> 32)
  54. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
  55. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
  56. {
  57. int32_t out;
  58. asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  59. return out;
  60. }
  61. // computes (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  62. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
  63. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
  64. {
  65. int32_t out;
  66. asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  67. return out;
  68. }
  69. // computes sum + (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  70. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  71. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  72. {
  73. int32_t out;
  74. asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  75. return out;
  76. }
  77. // computes sum - (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  78. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  79. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  80. {
  81. int32_t out;
  82. asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  83. return out;
  84. }
  85. // computes ((a[15:0] << 16) | b[15:0])
  86. static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  87. static inline uint32_t pack_16x16(int32_t a, int32_t b)
  88. {
  89. int32_t out;
  90. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  91. return out;
  92. }
  93. // computes (((a[31:16] + b[31:16]) << 16) | (a[15:0 + b[15:0]))
  94. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  95. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
  96. {
  97. int32_t out;
  98. asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  99. return out;
  100. }
  101. // computes (sum + ((a[31:0] * b[15:0]) >> 16))
  102. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  103. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
  104. {
  105. int32_t out;
  106. asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  107. return out;
  108. }
  109. // computes (sum + ((a[31:0] * b[31:16]) >> 16))
  110. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  111. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
  112. {
  113. int32_t out;
  114. asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  115. return out;
  116. }
  117. // computes logical and, forces compiler to allocate register and use single cycle instruction
  118. static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  119. static inline uint32_t logical_and(uint32_t a, uint32_t b)
  120. {
  121. asm volatile("and %0, %1" : "+r" (a) : "r" (b));
  122. return a;
  123. }
  124. // computes ((a[15:0] * b[15:0]) + (a[31:16] * b[31:16]))
  125. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  126. static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b)
  127. {
  128. int32_t out;
  129. asm volatile("smuad %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  130. return out;
  131. }
  132. // computes ((a[15:0] * b[31:16]) + (a[31:16] * b[15:0]))
  133. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  134. static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b)
  135. {
  136. int32_t out;
  137. asm volatile("smuadx %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  138. return out;
  139. }
  140. #endif