|
-
-
- #ifndef dspinst_h_
- #define dspinst_h_
-
- #include <stdint.h>
-
-
- static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
- static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
- {
- int32_t out;
- asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
- return out;
- }
-
-
- static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
- return out;
- }
-
-
-
- static inline uint32_t pack_16t_16t(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline uint32_t pack_16t_16t(int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("pkhtb %0, %1, %2, asr #16" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline uint32_t pack_16t_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline uint32_t pack_16t_16b(int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("pkhtb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline uint32_t pack_16b_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline uint32_t pack_16b_16b(int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
- return out;
- }
-
-
- static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline uint32_t pack_16x16(int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
- return out;
- }
-
-
- static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
- return out;
- }
-
-
- static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline uint32_t logical_and(uint32_t a, uint32_t b)
- {
- asm volatile("and %0, %1" : "+r" (a) : "r" (b));
- return a;
- }
-
-
- static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smuad %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smuadx %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smulbb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smulbt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smultb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smultt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t substract_32_saturate(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t substract_32_saturate(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("qsub %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
-
- #endif
|