Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

112 rindas
4.2KB

  1. #include <stdint.h>
  2. // computes limit((val >> rshift), 2**bits)
  3. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
  4. static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
  5. {
  6. int32_t out;
  7. asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
  8. return out;
  9. }
  10. // computes ((a[31:0] * b[15:0]) >> 16)
  11. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  12. static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
  13. {
  14. int32_t out;
  15. asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  16. return out;
  17. }
  18. // computes ((a[31:0] * b[31:16]) >> 16)
  19. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
  20. static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
  21. {
  22. int32_t out;
  23. asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  24. return out;
  25. }
  26. // computes (((int64_t)a[31:0] * (int64_t)b[31:0]) >> 32)
  27. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
  28. static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
  29. {
  30. int32_t out;
  31. asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  32. return out;
  33. }
  34. // computes (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  35. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
  36. static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
  37. {
  38. int32_t out;
  39. asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  40. return out;
  41. }
  42. // computes sum + (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  43. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  44. static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  45. {
  46. int32_t out;
  47. asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  48. return out;
  49. }
  50. // computes sum - (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
  51. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
  52. static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
  53. {
  54. int32_t out;
  55. asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  56. return out;
  57. }
  58. // computes ((a[15:0] << 16) | b[15:0])
  59. static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline, unused));
  60. static inline uint32_t pack_16x16(int32_t a, int32_t b)
  61. {
  62. int32_t out;
  63. asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
  64. return out;
  65. }
  66. // computes (((a[31:16] + b[31:16]) << 16) | (a[15:0 + b[15:0]))
  67. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  68. static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
  69. {
  70. int32_t out;
  71. asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
  72. return out;
  73. }
  74. // computes (sum + ((a[31:0] * b[15:0]) >> 16))
  75. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  76. static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
  77. {
  78. int32_t out;
  79. asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  80. return out;
  81. }
  82. // computes (sum + ((a[31:0] * b[31:16]) >> 16))
  83. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
  84. static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
  85. {
  86. int32_t out;
  87. asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
  88. return out;
  89. }
  90. // computes logical and, forces compiler to allocate register and use single cycle instruction
  91. static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
  92. static inline uint32_t logical_and(uint32_t a, uint32_t b)
  93. {
  94. asm volatile("and %0, %1" : "+r" (a) : "r" (b));
  95. return a;
  96. }