|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266 |
-
-
- #ifndef dspinst_h_
- #define dspinst_h_
-
- #include <stdint.h>
-
-
- static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift) __attribute__((always_inline, unused));
- static inline int32_t signed_saturate_rshift(int32_t val, int bits, int rshift)
- {
- #if defined(KINETISK)
- int32_t out;
- asm volatile("ssat %0, %1, %2, asr %3" : "=r" (out) : "I" (bits), "r" (val), "I" (rshift));
- return out;
- #elif defined(KINETISL)
- int32_t out, max;
- out = val >> rshift;
- max = 1 << (bits - 1);
- if (out >= 0) {
- if (out > max - 1) out = max - 1;
- } else {
- if (out < -max) out = -max;
- }
- return out;
- #endif
- }
-
-
- static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
- {
- #if defined(KINETISK)
- int32_t out;
- asm volatile("smulwb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- #elif defined(KINETISL)
- return ((int64_t)a * (int16_t)(b & 0xFFFF)) >> 16;
- #endif
- }
-
-
- static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
- {
- #if defined(KINETISK)
- int32_t out;
- asm volatile("smulwt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- #elif defined(KINETISL)
- return ((int64_t)a * (int16_t)(b >> 16)) >> 16;
- #endif
- }
-
-
- static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
- {
- int32_t out;
- asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
- return out;
- }
-
-
-
- static inline uint32_t pack_16t_16t(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline uint32_t pack_16t_16t(int32_t a, int32_t b)
- {
- #if defined(KINETISK)
- int32_t out;
- asm volatile("pkhtb %0, %1, %2, asr #16" : "=r" (out) : "r" (a), "r" (b));
- return out;
- #elif defined(KINETISL)
- return (a & 0xFFFF0000) | ((uint32_t)b >> 16);
- #endif
- }
-
-
- static inline uint32_t pack_16t_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline uint32_t pack_16t_16b(int32_t a, int32_t b)
- {
- #if defined(KINETISK)
- int32_t out;
- asm volatile("pkhtb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- #elif defined(KINETISL)
- return (a & 0xFFFF0000) | (b & 0x0000FFFF);
- #endif
- }
-
-
- static inline uint32_t pack_16b_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
- static inline uint32_t pack_16b_16b(int32_t a, int32_t b)
- {
- #if defined(KINETISK)
- int32_t out;
- asm volatile("pkhbt %0, %1, %2, lsl #16" : "=r" (out) : "r" (b), "r" (a));
- return out;
- #elif defined(KINETISL)
- return (a << 16) | (b & 0x0000FFFF);
- #endif
- }
-
-
-
-
-
- static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline uint32_t signed_add_16_and_16(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("qadd16 %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t signed_multiply_accumulate_32x16b(int32_t sum, int32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smlawb %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t signed_multiply_accumulate_32x16t(int32_t sum, int32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smlawt %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
- return out;
- }
-
-
- static inline uint32_t logical_and(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline uint32_t logical_and(uint32_t a, uint32_t b)
- {
- asm volatile("and %0, %1" : "+r" (a) : "r" (b));
- return a;
- }
-
-
- static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16tx16t_add_16bx16b(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smuad %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smuadx %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smulbb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16bx16t(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smulbt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16tx16b(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smultb %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t multiply_16tx16t(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("smultt %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
- static inline int32_t substract_32_saturate(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
- static inline int32_t substract_32_saturate(uint32_t a, uint32_t b)
- {
- int32_t out;
- asm volatile("qsub %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
- return out;
- }
-
-
-
- #endif
|