You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 line
4.3KB

  1. #ifndef dspinst_h_
  2. #define dspinst_h_
  3. #include <stdint.h>
  4. // computes limit((val >> rshift), 2**bits)
  5. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
  6. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
  7. {
  8. int32_t out;
  9. asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
  10. return out;
  11. }
  12. // computes ((a[31:0] * b[15:0]) >> 16)
  13. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  14. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
  15. {
  16. int32_t out;
  17. asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  18. return out;
  19. }
  20. // computes ((a[31:0] * b[31:16]) >> 16)
  21. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  22. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
  23. {
  24. int32_t out;
  25. asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  26. return out;
  27. }
  28. // computes (((int64_t)a[31:0] * (int64_t)b[31:0]) >> 32)
  29. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
  30. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
  31. {
  32. int32_t out;
  33. asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  34. return out;
  35. }
  36. // computes (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  37. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
  38. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
  39. {
  40. int32_t out;
  41. asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  42. return out;
  43. }
  44. // computes sum + (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  45. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  46. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  47. {
  48. int32_t out;
  49. asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  50. return out;
  51. }
  52. // computes sum - (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  53. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  54. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  55. {
  56. int32_t out;
  57. asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  58. return out;
  59. }
  60. // computes ((a[15:0] << 16) | b[15:0])
  61. static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  62. static inline uint32_t pack_16x16(int32_t a, int32_t b)
  63. {
  64. int32_t out;
  65. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  66. return out;
  67. }
  68. // computes (((a[31:16] + b[31:16]) << 16) | (a[15:0 + b[15:0]))
  69. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  70. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
  71. {
  72. int32_t out;
  73. asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  74. return out;
  75. }
  76. // computes (sum + ((a[31:0] * b[15:0]) >> 16))
  77. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  78. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
  79. {
  80. int32_t out;
  81. asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  82. return out;
  83. }
  84. // computes (sum + ((a[31:0] * b[31:16]) >> 16))
  85. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  86. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
  87. {
  88. int32_t out;
  89. asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  90. return out;
  91. }
  92. // computes logical and, forces compiler to allocate register and use single cycle instruction
  93. static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  94. static inline uint32_t logical_and(uint32_t a, uint32_t b)
  95. {
  96. asm volatile("and %0, %1" : "+r" (a) : "r" (b));
  97. return a;
  98. }
  99. #endif