You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

242 lines
4.7KB

  1. /* Teensyduino Audio Memcpy
  2. * Copyright (c) 2016, 2017, 2018, 2019 Frank Bösing
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining
  5. * a copy of this software and associated documentation files (the
  6. * "Software"), to deal in the Software without restriction, including
  7. * without limitation the rights to use, copy, modify, merge, publish,
  8. * distribute, sublicense, and/or sell copies of the Software, and to
  9. * permit persons to whom the Software is furnished to do so, subject to
  10. * the following conditions:
  11. *
  12. * 1. The above copyright notice and this permission notice shall be
  13. * included in all copies or substantial portions of the Software.
  14. *
  15. * 2. If the Software is incorporated into a build system that allows
  16. * selection among a list of target devices, then similar target
  17. * devices manufactured by PJRC.COM must be included in the list of
  18. * target devices and selectable in the same manner.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  24. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  26. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27. * SOFTWARE.
  28. */
  29. #if defined (__ARM_ARCH_7EM__)
  30. #include <AudioStream.h>
  31. .cpu cortex-m4
  32. .syntax unified
  33. .thumb
  34. .text
  35. /* void memcpy_tointerleave(short *dst, short *srcL, short *srcR); */
  36. .global memcpy_tointerleaveLR
  37. .thumb_func
  38. memcpy_tointerleaveLR:
  39. @ r0: dst
  40. @ r1: srcL
  41. @ r2: srcR
  42. #if AUDIO_BLOCK_SAMPLES > 8
  43. push {r4-r11,r14}
  44. add r14,r0,#(AUDIO_BLOCK_SAMPLES*2)
  45. .align 2
  46. .loopLR:
  47. //Load 2*4 words
  48. ldmia r1!, {r5,r7,r9,r11} //1+4
  49. ldmia r2!, {r6,r8,r10,r12} //1+4
  50. pkhbt r3,r5,r6,LSL #16 //1
  51. pkhtb r4,r6,r5,ASR #16 //1
  52. pkhbt r5,r7,r8,LSL #16 //1
  53. pkhtb r6,r8,r7,ASR #16 //1
  54. pkhbt r7,r9,r10,LSL #16 //1
  55. pkhtb r8,r10,r9,ASR #16 //1
  56. pkhbt r9,r11,r12,LSL #16 //1
  57. pkhtb r10,r12,r11,ASR #16 //1
  58. //Write 8 Words
  59. stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} //1+8 -> 5+5+8+9 = 27 Cycles to interleave 32 bytes.
  60. cmp r14, r0
  61. bne .loopLR
  62. pop {r4-r11,r14}
  63. #elif AUDIO_BLOCK_SAMPLES == 8
  64. push {r4-r8,r14}
  65. ldmia r1!, {r5,r7}
  66. ldmia r2!, {r6,r8}
  67. pkhbt r3,r5,r6,LSL #16
  68. pkhtb r4,r6,r5,ASR #16
  69. pkhbt r5,r7,r8,LSL #16
  70. pkhtb r6,r8,r7,ASR #16
  71. stmia r0!, {r3,r4,r5,r6}
  72. pop {r4-r8,r14}
  73. #endif
  74. BX lr
  75. /* void memcpy_tointerleaveL(short *dst, short *srcL); */
  76. .global memcpy_tointerleaveL
  77. .thumb_func
  78. memcpy_tointerleaveL:
  79. @ r0: dst
  80. @ r1: srcL
  81. mov r2, #0
  82. #if AUDIO_BLOCK_SAMPLES > 8
  83. push {r4-r11}
  84. add r12,r0,#(AUDIO_BLOCK_SAMPLES*2)
  85. .align 2
  86. .loopL:
  87. //Load 4 words
  88. ldmia r1!, {r5,r7,r9,r11} //1+4
  89. pkhbt r3,r5,r2 //1
  90. pkhtb r4,r2,r5,ASR #16 //1
  91. pkhbt r5,r7,r2 //1
  92. pkhtb r6,r2,r7,ASR #16 //1
  93. pkhbt r7,r9,r2 //1
  94. pkhtb r8,r2,r9,ASR #16 //1
  95. pkhbt r9,r11,r2 //1
  96. pkhtb r10,r2,r11,ASR #16 //1
  97. //Write 8 Words
  98. stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} //1+8
  99. cmp r12, r0
  100. bne .loopL
  101. pop {r4-r11}
  102. #elif AUDIO_BLOCK_SAMPLES == 8
  103. push {r4-r7}
  104. ldmia r1!, {r5,r7}
  105. pkhbt r3,r5,r2
  106. pkhtb r4,r2,r5,ASR #16
  107. pkhbt r5,r7,r2 //1
  108. pkhtb r6,r2,r7,ASR #16
  109. stmia r0!, {r3,r4,r5,r6}
  110. pop {r4-r7}
  111. #endif
  112. BX lr
  113. /* void memcpy_tointerleaveL(short *dst, short *srcR); */
  114. .global memcpy_tointerleaveR
  115. .thumb_func
  116. memcpy_tointerleaveR:
  117. @ r0: dst
  118. @ r1: srcR
  119. mov r2, #0
  120. #if AUDIO_BLOCK_SAMPLES > 8
  121. push {r4-r11}
  122. add r12,r0,#(AUDIO_BLOCK_SAMPLES*2)
  123. .align 2
  124. .loopR:
  125. //Load 4 words
  126. ldmia r1!, {r5,r7,r9,r11}
  127. pkhbt r3,r2,r5,LSL #16
  128. pkhtb r4,r5,r2
  129. pkhbt r5,r2,r7,LSL #16
  130. pkhtb r6,r7,r2
  131. pkhbt r7,r2,r9,LSL #16
  132. pkhtb r8,r9,r2
  133. pkhbt r9,r2,r11,LSL #16
  134. pkhtb r10,r11,r2
  135. //Write 8 Words
  136. stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10}
  137. cmp r12, r0
  138. bne .loopR
  139. pop {r4-r11}
  140. #elif AUDIO_BLOCK_SAMPLES == 8
  141. push {r4-r7}
  142. ldmia r1!, {r5,r7}
  143. pkhbt r3,r2,r5,LSL #16
  144. pkhtb r4,r5,r2
  145. pkhbt r5,r2,r7,LSL #16
  146. pkhtb r6,r7,r2
  147. stmia r0!, {r3,r4,r5,r6}
  148. pop {r4-r7}
  149. #endif
  150. BX lr
  151. /* void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, const int16_t *src3, const int16_t *src4) */
  152. .global memcpy_tointerleaveQuad
  153. .thumb_func
  154. memcpy_tointerleaveQuad:
  155. @ r0: dst
  156. @ r1: src1
  157. @ r2: src2
  158. @ r3: src3
  159. @ r4: src4
  160. push {r4-r11}
  161. ldr r4, [sp, #(0+32)] //5th parameter is saved on the stack
  162. add r11,r0,#(AUDIO_BLOCK_SAMPLES*4)
  163. .align 2
  164. .loopQuad:
  165. ldr r5, [r1],4
  166. ldr r6, [r3],4
  167. pkhbt r7,r5,r6,LSL #16
  168. pkhtb r9,r6,r5,ASR #16
  169. ldr r5, [r2],4
  170. ldr r6, [r4],4
  171. pkhbt r8,r5,r6,LSL #16
  172. pkhtb r10,r6,r5,ASR #16
  173. stmia r0!, {r7-r10}
  174. cmp r11, r0
  175. bne .loopQuad
  176. pop {r4-r11}
  177. BX lr
  178. .END
  179. #endif