9 年之前 · a2837e6724
--- a/memcpy_audio.S
+++ b/memcpy_audio.S
@@ -0,0 +1,169 @@
 /* Teensyduino Audio Memcpy
 * Copyright (c) 2016 Frank Bösing
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * 1. The above copyright notice and this permission notice shall be 
 * included in all copies or substantial portions of the Software.
 *
 * 2. If the Software is incorporated into a build system that allows 
 * selection among a list of target devices, then similar target
 * devices manufactured by PJRC.COM must be included in the list of
 * target devices and selectable in the same manner.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

 #if defined(__MK20DX128__) || defined(__MK20DX256__)

 .cpu cortex-m4
 .syntax unified
 .thumb
 .text
 .align	2

 /* void memcpy_tointerleave(short *dst, short *srcL, short *srcR); */
 .global	memcpy_tointerleaveLR
 .thumb_func
 	memcpy_tointerleaveLR:

 	@ r0: dst
 	@ r1: srcL
 	@ r2: srcR

 	push	{r3-r12,r14}
 	add r14,r0,#256		// TODO: 256 = AUDIO_BLOCK_SAMPLES*2
 	.align 2
 .loopLR:

 	.irp offset, 1,2

 	//Load 2*4 words
 	ldmia r1!, {r5,r7,r9,r11}  //1+4
 	ldmia r2!, {r6,r8,r10,r12} //1+4

 	pkhbt r3,r5,r6,LSL #16	//1
 	pkhtb r4,r6,r5,ASR #16	//1

 	pkhbt r5,r7,r8,LSL #16	//1
 	pkhtb r6,r8,r7,ASR #16	//1

 	pkhbt r7,r9,r10,LSL #16	//1
 	pkhtb r8,r10,r9,ASR #16	//1

 	pkhbt r9,r11,r12,LSL #16	//1
 	pkhtb r10,r12,r11,ASR #16	//1

 	//Write 8 Words
 	stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10}	//1+8

 	.endr //5+5+8+9 = 27 Cycles to interleave 32 bytes.

 	cmp r14, r0
 	bne .loopLR

 	pop	{r3-r12,r14}
 	BX lr


 /* void memcpy_tointerleaveL(short *dst, short *srcL); */
 .global	memcpy_tointerleaveL
 .thumb_func
 	memcpy_tointerleaveL:

 	@ r0: dst
 	@ r1: srcL

 	push	{r2-r12}
 	mov r2, #0
 	add r12,r0,#256		// TODO: 256 = AUDIO_BLOCK_SAMPLES*2
 	.align 2
 .loopL:

 	.irp offset, 1,2

 	//Load 4 words
 	ldmia r1!, {r5,r7,r9,r11}  //1+4

 	pkhbt r3,r5,r2	//1
 	pkhtb r4,r2,r5,ASR #16	//1

 	pkhbt r5,r7,r2	//1
 	pkhtb r6,r2,r7,ASR #16	//1

 	pkhbt r7,r9,r2	//1
 	pkhtb r8,r2,r9,ASR #16	//1

 	pkhbt r9,r11,r2	//1
 	pkhtb r10,r2,r11,ASR #16	//1

 	//Write 8 Words
 	stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10}	//1+8

 	.endr

 	cmp r12, r0
 	bne .loopL

 	pop	{r2-r12}
 	BX lr

 /* void memcpy_tointerleaveL(short *dst, short *srcR); */
 .global	memcpy_tointerleaveR
 .thumb_func
 	memcpy_tointerleaveR:

 	@ r0: dst
 	@ r1: srcR

 	push	{r2-r12}
 	mov r2, #0
 	add r12,r0,#256		// TODO: 256 = AUDIO_BLOCK_SAMPLES*2
 	.align 2
 .loopR:

 	.irp offset, 1,2

 	//Load 4 words
 	ldmia r1!, {r5,r7,r9,r11}

 	pkhbt r3,r2,r5,LSL #16
 	pkhtb r4,r5,r2

 	pkhbt r5,r2,r7,LSL #16
 	pkhtb r6,r7,r2

 	pkhbt r7,r2,r9,LSL #16
 	pkhtb r8,r9,r2

 	pkhbt r9,r2,r11,LSL #16
 	pkhtb r10,r11,r2

 	//Write 8 Words
 	stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10}

 	.endr

 	cmp r12, r0
 	bne .loopR

 	pop	{r2-r12}
 	BX lr


 .END

 #endif
--- a/memcpy_audio.h
+++ b/memcpy_audio.h
@@ -0,0 +1,44 @@
 /* Teensyduino Audio Memcpy
 * Copyright (c) 2016 Frank Bösing
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * 1. The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * 2. If the Software is incorporated into a build system that allows
 * selection among a list of target devices, then similar target
 * devices manufactured by PJRC.COM must be included in the list of
 * target devices and selectable in the same manner.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

 #ifndef memcpy_audio_h_
 #define memcpy_audio_h_

 #ifdef __cplusplus
 extern "C" {
 #endif
 void memcpy_tointerleaveLR(int16_t *dst, const int16_t *srcL, const int16_t *srcR);
 void memcpy_tointerleaveL(int16_t *dst, const int16_t *srcL);
 void memcpy_tointerleaveR(int16_t *dst, const int16_t *srcR);
 #ifdef __cplusplus
 }
 #endif


 #endif
--- a/output_i2s.cpp
+++ b/output_i2s.cpp
@@ -25,6 +25,7 @@
 */

 #include "output_i2s.h"
 #include "memcpy_audio.h"

 audio_block_t * AudioOutputI2S::block_left_1st = NULL;
 audio_block_t * AudioOutputI2S::block_right_1st = NULL;
@@ -71,14 +72,67 @@ void AudioOutputI2S::begin(void)

 void AudioOutputI2S::isr(void)
 {
 #if defined(KINETISK)
 	int16_t *dest;
 	audio_block_t *blockL, *blockR;
 	uint32_t saddr, offsetL, offsetR;

 	saddr = (uint32_t)(dma.TCD->SADDR);
 	dma.clearInterrupt();
 	if (saddr < (uint32_t)i2s_tx_buffer + sizeof(i2s_tx_buffer) / 2) {
 		// DMA is transmitting the first half of the buffer
 		// so we must fill the second half
 		dest = (int16_t *)&i2s_tx_buffer[AUDIO_BLOCK_SAMPLES/2];
 		if (AudioOutputI2S::update_responsibility) AudioStream::update_all();
 	} else {
 		// DMA is transmitting the second half of the buffer
 		// so we must fill the first half
 		dest = (int16_t *)i2s_tx_buffer;
 	}

 	blockL = AudioOutputI2S::block_left_1st;
 	blockR = AudioOutputI2S::block_right_1st;
 	offsetL = AudioOutputI2S::block_left_offset;
 	offsetR = AudioOutputI2S::block_right_offset;

 	if (blockL && blockR) {
 		memcpy_tointerleaveLR(dest, blockL->data + offsetL, blockR->data + offsetR);
 		offsetL += AUDIO_BLOCK_SAMPLES / 2;
 		offsetR += AUDIO_BLOCK_SAMPLES / 2;
 	} else if (blockL) {
 		memcpy_tointerleaveL(dest, blockL->data + offsetL);
 		offsetL += AUDIO_BLOCK_SAMPLES / 2;
 	} else if (blockR) {
 		memcpy_tointerleaveR(dest, blockR->data + offsetR);
 		offsetR += AUDIO_BLOCK_SAMPLES / 2;
 	} else {
 		memset(dest,0,AUDIO_BLOCK_SAMPLES * 2);
 		return;
 	}

 	if (offsetL < AUDIO_BLOCK_SAMPLES) {
 		AudioOutputI2S::block_left_offset = offsetL;
 	} else {
 		AudioOutputI2S::block_left_offset = 0;
 		AudioStream::release(blockL);
 		AudioOutputI2S::block_left_1st = AudioOutputI2S::block_left_2nd;
 		AudioOutputI2S::block_left_2nd = NULL;
 	}
 	if (offsetR < AUDIO_BLOCK_SAMPLES) {
 		AudioOutputI2S::block_right_offset = offsetR;
 	} else {
 		AudioOutputI2S::block_right_offset = 0;
 		AudioStream::release(blockR);
 		AudioOutputI2S::block_right_1st = AudioOutputI2S::block_right_2nd;
 		AudioOutputI2S::block_right_2nd = NULL;
 	}
 #else
 	const int16_t *src, *end;
 	int16_t *dest;
 	audio_block_t *block;
 	uint32_t saddr, offset;

 #if defined(KINETISK)
 	saddr = (uint32_t)(dma.TCD->SADDR);
 #endif
 	saddr = (uint32_t)(dma.CFG->SAR);
 	dma.clearInterrupt();
 	if (saddr < (uint32_t)i2s_tx_buffer + sizeof(i2s_tx_buffer) / 2) {
 		// DMA is transmitting the first half of the buffer
@@ -93,7 +147,6 @@ void AudioOutputI2S::isr(void)
 		end = (int16_t *)&i2s_tx_buffer[AUDIO_BLOCK_SAMPLES/2];
 	}

 	// TODO: these copy routines could be merged and optimized, maybe in assembly?
 	block = AudioOutputI2S::block_left_1st;
 	if (block) {
 		offset = AudioOutputI2S::block_left_offset;
@@ -141,6 +194,7 @@ void AudioOutputI2S::isr(void)
 			dest += 2;
 		} while (dest < end);
 	}
 #endif
 }