You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

542 line
16KB

  1. /**************************************************************************//**
  2. * @file core_cm4_simd.h
  3. * @brief CMSIS Cortex-M4 SIMD Header File
  4. * @version V3.01
  5. * @date 06. March 2012
  6. *
  7. * @note
  8. * Copyright (C) 2010-2012 ARM Limited. All rights reserved.
  9. *
  10. * @par
  11. * ARM Limited (ARM) is supplying this software for use with Cortex-M
  12. * processor based microcontrollers. This file can be freely distributed
  13. * within development tools that are supporting such ARM based processors.
  14. *
  15. * @par
  16. * THIS SOFTWARE IS PROVIDED "AS IS". NO WARRANTIES, WHETHER EXPRESS, IMPLIED
  17. * OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF
  18. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE APPLY TO THIS SOFTWARE.
  19. * ARM SHALL NOT, IN ANY CIRCUMSTANCES, BE LIABLE FOR SPECIAL, INCIDENTAL, OR
  20. * CONSEQUENTIAL DAMAGES, FOR ANY REASON WHATSOEVER.
  21. *
  22. ******************************************************************************/
  23. #ifdef __cplusplus
  24. extern "C" {
  25. #endif
  26. #ifndef __CORE_CM4_SIMD_H
  27. #define __CORE_CM4_SIMD_H
  28. /*******************************************************************************
  29. * Hardware Abstraction Layer
  30. ******************************************************************************/
  31. /* ################### Compiler specific Intrinsics ########################### */
  32. /** \defgroup CMSIS_SIMD_intrinsics CMSIS SIMD Intrinsics
  33. Access to dedicated SIMD instructions
  34. @{
  35. */
  36. #if defined ( __GNUC__ ) /*------------------ GNU Compiler ---------------------*/
  37. /* GNU gcc specific functions */
  38. /*------ CM4 SIMD Intrinsics -----------------------------------------------------*/
  39. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD8(uint32_t op1, uint32_t op2)
  40. {
  41. uint32_t result;
  42. __ASM volatile ("sadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  43. return(result);
  44. }
  45. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD8(uint32_t op1, uint32_t op2)
  46. {
  47. uint32_t result;
  48. __ASM volatile ("qadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  49. return(result);
  50. }
  51. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD8(uint32_t op1, uint32_t op2)
  52. {
  53. uint32_t result;
  54. __ASM volatile ("shadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  55. return(result);
  56. }
  57. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD8(uint32_t op1, uint32_t op2)
  58. {
  59. uint32_t result;
  60. __ASM volatile ("uadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  61. return(result);
  62. }
  63. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD8(uint32_t op1, uint32_t op2)
  64. {
  65. uint32_t result;
  66. __ASM volatile ("uqadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  67. return(result);
  68. }
  69. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD8(uint32_t op1, uint32_t op2)
  70. {
  71. uint32_t result;
  72. __ASM volatile ("uhadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  73. return(result);
  74. }
  75. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB8(uint32_t op1, uint32_t op2)
  76. {
  77. uint32_t result;
  78. __ASM volatile ("ssub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  79. return(result);
  80. }
  81. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2)
  82. {
  83. uint32_t result;
  84. __ASM volatile ("qsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  85. return(result);
  86. }
  87. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB8(uint32_t op1, uint32_t op2)
  88. {
  89. uint32_t result;
  90. __ASM volatile ("shsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  91. return(result);
  92. }
  93. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB8(uint32_t op1, uint32_t op2)
  94. {
  95. uint32_t result;
  96. __ASM volatile ("usub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  97. return(result);
  98. }
  99. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB8(uint32_t op1, uint32_t op2)
  100. {
  101. uint32_t result;
  102. __ASM volatile ("uqsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  103. return(result);
  104. }
  105. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB8(uint32_t op1, uint32_t op2)
  106. {
  107. uint32_t result;
  108. __ASM volatile ("uhsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  109. return(result);
  110. }
  111. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD16(uint32_t op1, uint32_t op2)
  112. {
  113. uint32_t result;
  114. __ASM volatile ("sadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  115. return(result);
  116. }
  117. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD16(uint32_t op1, uint32_t op2)
  118. {
  119. uint32_t result;
  120. __ASM volatile ("qadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  121. return(result);
  122. }
  123. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2)
  124. {
  125. uint32_t result;
  126. __ASM volatile ("shadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  127. return(result);
  128. }
  129. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD16(uint32_t op1, uint32_t op2)
  130. {
  131. uint32_t result;
  132. __ASM volatile ("uadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  133. return(result);
  134. }
  135. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD16(uint32_t op1, uint32_t op2)
  136. {
  137. uint32_t result;
  138. __ASM volatile ("uqadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  139. return(result);
  140. }
  141. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD16(uint32_t op1, uint32_t op2)
  142. {
  143. uint32_t result;
  144. __ASM volatile ("uhadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  145. return(result);
  146. }
  147. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB16(uint32_t op1, uint32_t op2)
  148. {
  149. uint32_t result;
  150. __ASM volatile ("ssub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  151. return(result);
  152. }
  153. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2)
  154. {
  155. uint32_t result;
  156. __ASM volatile ("qsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  157. return(result);
  158. }
  159. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2)
  160. {
  161. uint32_t result;
  162. __ASM volatile ("shsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  163. return(result);
  164. }
  165. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB16(uint32_t op1, uint32_t op2)
  166. {
  167. uint32_t result;
  168. __ASM volatile ("usub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  169. return(result);
  170. }
  171. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB16(uint32_t op1, uint32_t op2)
  172. {
  173. uint32_t result;
  174. __ASM volatile ("uqsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  175. return(result);
  176. }
  177. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB16(uint32_t op1, uint32_t op2)
  178. {
  179. uint32_t result;
  180. __ASM volatile ("uhsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  181. return(result);
  182. }
  183. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SASX(uint32_t op1, uint32_t op2)
  184. {
  185. uint32_t result;
  186. __ASM volatile ("sasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  187. return(result);
  188. }
  189. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QASX(uint32_t op1, uint32_t op2)
  190. {
  191. uint32_t result;
  192. __ASM volatile ("qasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  193. return(result);
  194. }
  195. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHASX(uint32_t op1, uint32_t op2)
  196. {
  197. uint32_t result;
  198. __ASM volatile ("shasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  199. return(result);
  200. }
  201. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UASX(uint32_t op1, uint32_t op2)
  202. {
  203. uint32_t result;
  204. __ASM volatile ("uasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  205. return(result);
  206. }
  207. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQASX(uint32_t op1, uint32_t op2)
  208. {
  209. uint32_t result;
  210. __ASM volatile ("uqasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  211. return(result);
  212. }
  213. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHASX(uint32_t op1, uint32_t op2)
  214. {
  215. uint32_t result;
  216. __ASM volatile ("uhasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  217. return(result);
  218. }
  219. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSAX(uint32_t op1, uint32_t op2)
  220. {
  221. uint32_t result;
  222. __ASM volatile ("ssax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  223. return(result);
  224. }
  225. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSAX(uint32_t op1, uint32_t op2)
  226. {
  227. uint32_t result;
  228. __ASM volatile ("qsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  229. return(result);
  230. }
  231. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2)
  232. {
  233. uint32_t result;
  234. __ASM volatile ("shsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  235. return(result);
  236. }
  237. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAX(uint32_t op1, uint32_t op2)
  238. {
  239. uint32_t result;
  240. __ASM volatile ("usax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  241. return(result);
  242. }
  243. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSAX(uint32_t op1, uint32_t op2)
  244. {
  245. uint32_t result;
  246. __ASM volatile ("uqsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  247. return(result);
  248. }
  249. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSAX(uint32_t op1, uint32_t op2)
  250. {
  251. uint32_t result;
  252. __ASM volatile ("uhsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  253. return(result);
  254. }
  255. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAD8(uint32_t op1, uint32_t op2)
  256. {
  257. uint32_t result;
  258. __ASM volatile ("usad8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  259. return(result);
  260. }
  261. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USADA8(uint32_t op1, uint32_t op2, uint32_t op3)
  262. {
  263. uint32_t result;
  264. __ASM volatile ("usada8 %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
  265. return(result);
  266. }
  267. #define __SSAT16(ARG1,ARG2) \
  268. ({ \
  269. uint32_t __RES, __ARG1 = (ARG1); \
  270. __ASM ("ssat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \
  271. __RES; \
  272. })
  273. #define __USAT16(ARG1,ARG2) \
  274. ({ \
  275. uint32_t __RES, __ARG1 = (ARG1); \
  276. __ASM ("usat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \
  277. __RES; \
  278. })
  279. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTB16(uint32_t op1)
  280. {
  281. uint32_t result;
  282. __ASM volatile ("uxtb16 %0, %1" : "=r" (result) : "r" (op1));
  283. return(result);
  284. }
  285. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTAB16(uint32_t op1, uint32_t op2)
  286. {
  287. uint32_t result;
  288. __ASM volatile ("uxtab16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  289. return(result);
  290. }
  291. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTB16(uint32_t op1)
  292. {
  293. uint32_t result;
  294. __ASM volatile ("sxtb16 %0, %1" : "=r" (result) : "r" (op1));
  295. return(result);
  296. }
  297. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTAB16(uint32_t op1, uint32_t op2)
  298. {
  299. uint32_t result;
  300. __ASM volatile ("sxtab16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  301. return(result);
  302. }
  303. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2)
  304. {
  305. uint32_t result;
  306. __ASM volatile ("smuad %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  307. return(result);
  308. }
  309. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2)
  310. {
  311. uint32_t result;
  312. __ASM volatile ("smuadx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  313. return(result);
  314. }
  315. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3)
  316. {
  317. uint32_t result;
  318. __ASM volatile ("smlad %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
  319. return(result);
  320. }
  321. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3)
  322. {
  323. uint32_t result;
  324. __ASM volatile ("smladx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
  325. return(result);
  326. }
  327. #define __SMLALD(ARG1,ARG2,ARG3) \
  328. ({ \
  329. uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((uint64_t)(ARG3) >> 32), __ARG3_L = (uint32_t)((uint64_t)(ARG3) & 0xFFFFFFFFUL); \
  330. __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
  331. (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
  332. })
  333. #define __SMLALDX(ARG1,ARG2,ARG3) \
  334. ({ \
  335. uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((uint64_t)(ARG3) >> 32), __ARG3_L = (uint32_t)((uint64_t)(ARG3) & 0xFFFFFFFFUL); \
  336. __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
  337. (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
  338. })
  339. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2)
  340. {
  341. uint32_t result;
  342. __ASM volatile ("smusd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  343. return(result);
  344. }
  345. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2)
  346. {
  347. uint32_t result;
  348. __ASM volatile ("smusdx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  349. return(result);
  350. }
  351. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSD (uint32_t op1, uint32_t op2, uint32_t op3)
  352. {
  353. uint32_t result;
  354. __ASM volatile ("smlsd %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
  355. return(result);
  356. }
  357. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3)
  358. {
  359. uint32_t result;
  360. __ASM volatile ("smlsdx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
  361. return(result);
  362. }
  363. #define __SMLSLD(ARG1,ARG2,ARG3) \
  364. ({ \
  365. uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((ARG3) >> 32), __ARG3_L = (uint32_t)((ARG3) & 0xFFFFFFFFUL); \
  366. __ASM volatile ("smlsld %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
  367. (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
  368. })
  369. #define __SMLSLDX(ARG1,ARG2,ARG3) \
  370. ({ \
  371. uint32_t __ARG1 = (ARG1), __ARG2 = (ARG2), __ARG3_H = (uint32_t)((ARG3) >> 32), __ARG3_L = (uint32_t)((ARG3) & 0xFFFFFFFFUL); \
  372. __ASM volatile ("smlsldx %0, %1, %2, %3" : "=r" (__ARG3_L), "=r" (__ARG3_H) : "r" (__ARG1), "r" (__ARG2), "0" (__ARG3_L), "1" (__ARG3_H) ); \
  373. (uint64_t)(((uint64_t)__ARG3_H << 32) | __ARG3_L); \
  374. })
  375. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SEL (uint32_t op1, uint32_t op2)
  376. {
  377. uint32_t result;
  378. __ASM volatile ("sel %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  379. return(result);
  380. }
  381. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD(uint32_t op1, uint32_t op2)
  382. {
  383. uint32_t result;
  384. __ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  385. return(result);
  386. }
  387. __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB(uint32_t op1, uint32_t op2)
  388. {
  389. uint32_t result;
  390. __ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
  391. return(result);
  392. }
  393. #define __PKHBT(ARG1,ARG2,ARG3) \
  394. ({ \
  395. uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
  396. __ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \
  397. __RES; \
  398. })
  399. #define __PKHTB(ARG1,ARG2,ARG3) \
  400. ({ \
  401. uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \
  402. if (ARG3 == 0) \
  403. __ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \
  404. else \
  405. __ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \
  406. __RES; \
  407. })
  408. /*-- End CM4 SIMD Intrinsics -----------------------------------------------------*/
  409. #endif /* __GNUC__ */
  410. #endif /* __CORE_CM4_SIMD_H */
  411. #ifdef __cplusplus
  412. }
  413. #endif