|
- /* Teensyduino Audio Memcpy
- * Copyright (c) 2016 Frank Bösing
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * 1. The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * 2. If the Software is incorporated into a build system that allows
- * selection among a list of target devices, then similar target
- * devices manufactured by PJRC.COM must be included in the list of
- * target devices and selectable in the same manner.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
- #if defined(__MK20DX128__) || defined(__MK20DX256__) || defined(__MK64FX512__) || defined(__MK66FX1M0__)
-
- .cpu cortex-m4
- .syntax unified
- .thumb
- .text
- .align 2
-
- /* void memcpy_tointerleave(short *dst, short *srcL, short *srcR); */
- .global memcpy_tointerleaveLR
- .thumb_func
- memcpy_tointerleaveLR:
-
- @ r0: dst
- @ r1: srcL
- @ r2: srcR
-
- push {r4-r11,r14}
- add r14,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
- .align 2
- .loopLR:
-
- .irp offset, 1,2
-
- //Load 2*4 words
- ldmia r1!, {r5,r7,r9,r11} //1+4
- ldmia r2!, {r6,r8,r10,r12} //1+4
-
- pkhbt r3,r5,r6,LSL #16 //1
- pkhtb r4,r6,r5,ASR #16 //1
-
- pkhbt r5,r7,r8,LSL #16 //1
- pkhtb r6,r8,r7,ASR #16 //1
-
- pkhbt r7,r9,r10,LSL #16 //1
- pkhtb r8,r10,r9,ASR #16 //1
-
- pkhbt r9,r11,r12,LSL #16 //1
- pkhtb r10,r12,r11,ASR #16 //1
-
- //Write 8 Words
- stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} //1+8
-
- .endr //5+5+8+9 = 27 Cycles to interleave 32 bytes.
-
- cmp r14, r0
- bne .loopLR
-
- pop {r4-r11,r14}
- BX lr
-
-
- /* void memcpy_tointerleaveL(short *dst, short *srcL); */
- .global memcpy_tointerleaveL
- .thumb_func
- memcpy_tointerleaveL:
-
- @ r0: dst
- @ r1: srcL
-
- push {r4-r11}
- mov r2, #0
- add r12,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
- .align 2
- .loopL:
-
- .irp offset, 1,2
-
- //Load 4 words
- ldmia r1!, {r5,r7,r9,r11} //1+4
-
- pkhbt r3,r5,r2 //1
- pkhtb r4,r2,r5,ASR #16 //1
-
- pkhbt r5,r7,r2 //1
- pkhtb r6,r2,r7,ASR #16 //1
-
- pkhbt r7,r9,r2 //1
- pkhtb r8,r2,r9,ASR #16 //1
-
- pkhbt r9,r11,r2 //1
- pkhtb r10,r2,r11,ASR #16 //1
-
- //Write 8 Words
- stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} //1+8
-
- .endr
-
- cmp r12, r0
- bne .loopL
-
- pop {r4-r11}
- BX lr
-
- /* void memcpy_tointerleaveL(short *dst, short *srcR); */
- .global memcpy_tointerleaveR
- .thumb_func
- memcpy_tointerleaveR:
-
- @ r0: dst
- @ r1: srcR
-
- push {r4-r11}
- mov r2, #0
- add r12,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
- .align 2
- .loopR:
-
- .irp offset, 1,2
-
- //Load 4 words
- ldmia r1!, {r5,r7,r9,r11}
-
- pkhbt r3,r2,r5,LSL #16
- pkhtb r4,r5,r2
-
- pkhbt r5,r2,r7,LSL #16
- pkhtb r6,r7,r2
-
- pkhbt r7,r2,r9,LSL #16
- pkhtb r8,r9,r2
-
- pkhbt r9,r2,r11,LSL #16
- pkhtb r10,r11,r2
-
- //Write 8 Words
- stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10}
-
- .endr
-
- cmp r12, r0
- bne .loopR
-
- pop {r4-r11}
- BX lr
-
-
-
- /* void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, const int16_t *src3, const int16_t *src4) */
- .global memcpy_tointerleaveQuad
- .thumb_func
- memcpy_tointerleaveQuad:
-
- @ r0: dst
- @ r1: src1
- @ r2: src2
- @ r3: src3
- @ r4: src4
-
- push {r4-r11}
- ldr r4, [sp, #(0+32)] //5th parameter is saved on the stack
- add r11,r0,#512 // TODO: 512 = AUDIO_BLOCK_SAMPLES*4
- .align 2
- .loopQuad:
-
- .irp offset, 1,2
-
- ldr r5, [r1],4
- ldr r6, [r3],4
- pkhbt r7,r5,r6,LSL #16
- pkhtb r9,r6,r5,ASR #16
- ldr r5, [r2],4
- ldr r6, [r4],4
- pkhbt r8,r5,r6,LSL #16
- pkhtb r10,r6,r5,ASR #16
-
- stmia r0!, {r7-r10}
-
- .endr
-
- cmp r11, r0
- bne .loopQuad
-
- pop {r4-r11}
-
- BX lr
-
-
- .END
-
- #endif
|