You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

211 lines
4.2KB

  1. /* Teensyduino Audio Memcpy
  2. * Copyright (c) 2016 Frank Bösing
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining
  5. * a copy of this software and associated documentation files (the
  6. * "Software"), to deal in the Software without restriction, including
  7. * without limitation the rights to use, copy, modify, merge, publish,
  8. * distribute, sublicense, and/or sell copies of the Software, and to
  9. * permit persons to whom the Software is furnished to do so, subject to
  10. * the following conditions:
  11. *
  12. * 1. The above copyright notice and this permission notice shall be
  13. * included in all copies or substantial portions of the Software.
  14. *
  15. * 2. If the Software is incorporated into a build system that allows
  16. * selection among a list of target devices, then similar target
  17. * devices manufactured by PJRC.COM must be included in the list of
  18. * target devices and selectable in the same manner.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  23. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  24. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  26. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27. * SOFTWARE.
  28. */
  29. #if defined(__MK20DX128__) || defined(__MK20DX256__) || defined(__MK64FX512__) || defined(__MK66FX1M0__)
  30. .cpu cortex-m4
  31. .syntax unified
  32. .thumb
  33. .text
  34. .align 2
  35. /* void memcpy_tointerleave(short *dst, short *srcL, short *srcR); */
  36. .global memcpy_tointerleaveLR
  37. .thumb_func
  38. memcpy_tointerleaveLR:
  39. @ r0: dst
  40. @ r1: srcL
  41. @ r2: srcR
  42. push {r4-r11,r14}
  43. add r14,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
  44. .align 2
  45. .loopLR:
  46. .irp offset, 1,2
  47. //Load 2*4 words
  48. ldmia r1!, {r5,r7,r9,r11} //1+4
  49. ldmia r2!, {r6,r8,r10,r12} //1+4
  50. pkhbt r3,r5,r6,LSL #16 //1
  51. pkhtb r4,r6,r5,ASR #16 //1
  52. pkhbt r5,r7,r8,LSL #16 //1
  53. pkhtb r6,r8,r7,ASR #16 //1
  54. pkhbt r7,r9,r10,LSL #16 //1
  55. pkhtb r8,r10,r9,ASR #16 //1
  56. pkhbt r9,r11,r12,LSL #16 //1
  57. pkhtb r10,r12,r11,ASR #16 //1
  58. //Write 8 Words
  59. stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} //1+8
  60. .endr //5+5+8+9 = 27 Cycles to interleave 32 bytes.
  61. cmp r14, r0
  62. bne .loopLR
  63. pop {r4-r11,r14}
  64. BX lr
  65. /* void memcpy_tointerleaveL(short *dst, short *srcL); */
  66. .global memcpy_tointerleaveL
  67. .thumb_func
  68. memcpy_tointerleaveL:
  69. @ r0: dst
  70. @ r1: srcL
  71. push {r4-r11}
  72. mov r2, #0
  73. add r12,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
  74. .align 2
  75. .loopL:
  76. .irp offset, 1,2
  77. //Load 4 words
  78. ldmia r1!, {r5,r7,r9,r11} //1+4
  79. pkhbt r3,r5,r2 //1
  80. pkhtb r4,r2,r5,ASR #16 //1
  81. pkhbt r5,r7,r2 //1
  82. pkhtb r6,r2,r7,ASR #16 //1
  83. pkhbt r7,r9,r2 //1
  84. pkhtb r8,r2,r9,ASR #16 //1
  85. pkhbt r9,r11,r2 //1
  86. pkhtb r10,r2,r11,ASR #16 //1
  87. //Write 8 Words
  88. stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} //1+8
  89. .endr
  90. cmp r12, r0
  91. bne .loopL
  92. pop {r4-r11}
  93. BX lr
  94. /* void memcpy_tointerleaveL(short *dst, short *srcR); */
  95. .global memcpy_tointerleaveR
  96. .thumb_func
  97. memcpy_tointerleaveR:
  98. @ r0: dst
  99. @ r1: srcR
  100. push {r4-r11}
  101. mov r2, #0
  102. add r12,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
  103. .align 2
  104. .loopR:
  105. .irp offset, 1,2
  106. //Load 4 words
  107. ldmia r1!, {r5,r7,r9,r11}
  108. pkhbt r3,r2,r5,LSL #16
  109. pkhtb r4,r5,r2
  110. pkhbt r5,r2,r7,LSL #16
  111. pkhtb r6,r7,r2
  112. pkhbt r7,r2,r9,LSL #16
  113. pkhtb r8,r9,r2
  114. pkhbt r9,r2,r11,LSL #16
  115. pkhtb r10,r11,r2
  116. //Write 8 Words
  117. stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10}
  118. .endr
  119. cmp r12, r0
  120. bne .loopR
  121. pop {r4-r11}
  122. BX lr
  123. /* void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, const int16_t *src3, const int16_t *src4) */
  124. .global memcpy_tointerleaveQuad
  125. .thumb_func
  126. memcpy_tointerleaveQuad:
  127. @ r0: dst
  128. @ r1: src1
  129. @ r2: src2
  130. @ r3: src3
  131. @ r4: src4
  132. push {r4-r11}
  133. ldr r4, [sp, #(0+32)] //5th parameter is saved on the stack
  134. add r11,r0,#512 // TODO: 512 = AUDIO_BLOCK_SAMPLES*4
  135. .align 2
  136. .loopQuad:
  137. .irp offset, 1,2
  138. ldr r5, [r1],4
  139. ldr r6, [r3],4
  140. pkhbt r7,r5,r6,LSL #16
  141. pkhtb r9,r6,r5,ASR #16
  142. ldr r5, [r2],4
  143. ldr r6, [r4],4
  144. pkhbt r8,r5,r6,LSL #16
  145. pkhtb r10,r6,r5,ASR #16
  146. stmia r0!, {r7-r10}
  147. .endr
  148. cmp r11, r0
  149. bne .loopQuad
  150. pop {r4-r11}
  151. BX lr
  152. .END
  153. #endif