소스 검색

Optimize I2S output memory copy (thanks Frank Bösing)

dds
PaulStoffregen 8 년 전
부모
커밋
a2837e6724
3개의 변경된 파일271개의 추가작업 그리고 4개의 파일을 삭제
  1. +169
    -0
      memcpy_audio.S
  2. +44
    -0
      memcpy_audio.h
  3. +58
    -4
      output_i2s.cpp

+ 169
- 0
memcpy_audio.S 파일 보기

@@ -0,0 +1,169 @@
/* Teensyduino Audio Memcpy
* Copyright (c) 2016 Frank Bösing
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* 1. The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* 2. If the Software is incorporated into a build system that allows
* selection among a list of target devices, then similar target
* devices manufactured by PJRC.COM must be included in the list of
* target devices and selectable in the same manner.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#if defined(__MK20DX128__) || defined(__MK20DX256__)

.cpu cortex-m4
.syntax unified
.thumb
.text
.align 2

/* void memcpy_tointerleave(short *dst, short *srcL, short *srcR); */
.global memcpy_tointerleaveLR
.thumb_func
memcpy_tointerleaveLR:

@ r0: dst
@ r1: srcL
@ r2: srcR

push {r3-r12,r14}
add r14,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
.align 2
.loopLR:

.irp offset, 1,2

//Load 2*4 words
ldmia r1!, {r5,r7,r9,r11} //1+4
ldmia r2!, {r6,r8,r10,r12} //1+4

pkhbt r3,r5,r6,LSL #16 //1
pkhtb r4,r6,r5,ASR #16 //1

pkhbt r5,r7,r8,LSL #16 //1
pkhtb r6,r8,r7,ASR #16 //1

pkhbt r7,r9,r10,LSL #16 //1
pkhtb r8,r10,r9,ASR #16 //1

pkhbt r9,r11,r12,LSL #16 //1
pkhtb r10,r12,r11,ASR #16 //1

//Write 8 Words
stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} //1+8

.endr //5+5+8+9 = 27 Cycles to interleave 32 bytes.

cmp r14, r0
bne .loopLR

pop {r3-r12,r14}
BX lr


/* void memcpy_tointerleaveL(short *dst, short *srcL); */
.global memcpy_tointerleaveL
.thumb_func
memcpy_tointerleaveL:

@ r0: dst
@ r1: srcL

push {r2-r12}
mov r2, #0
add r12,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
.align 2
.loopL:

.irp offset, 1,2

//Load 4 words
ldmia r1!, {r5,r7,r9,r11} //1+4

pkhbt r3,r5,r2 //1
pkhtb r4,r2,r5,ASR #16 //1

pkhbt r5,r7,r2 //1
pkhtb r6,r2,r7,ASR #16 //1

pkhbt r7,r9,r2 //1
pkhtb r8,r2,r9,ASR #16 //1

pkhbt r9,r11,r2 //1
pkhtb r10,r2,r11,ASR #16 //1

//Write 8 Words
stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} //1+8

.endr

cmp r12, r0
bne .loopL

pop {r2-r12}
BX lr

/* void memcpy_tointerleaveL(short *dst, short *srcR); */
.global memcpy_tointerleaveR
.thumb_func
memcpy_tointerleaveR:

@ r0: dst
@ r1: srcR

push {r2-r12}
mov r2, #0
add r12,r0,#256 // TODO: 256 = AUDIO_BLOCK_SAMPLES*2
.align 2
.loopR:

.irp offset, 1,2

//Load 4 words
ldmia r1!, {r5,r7,r9,r11}

pkhbt r3,r2,r5,LSL #16
pkhtb r4,r5,r2

pkhbt r5,r2,r7,LSL #16
pkhtb r6,r7,r2

pkhbt r7,r2,r9,LSL #16
pkhtb r8,r9,r2

pkhbt r9,r2,r11,LSL #16
pkhtb r10,r11,r2

//Write 8 Words
stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10}

.endr

cmp r12, r0
bne .loopR

pop {r2-r12}
BX lr


.END

#endif

+ 44
- 0
memcpy_audio.h 파일 보기

@@ -0,0 +1,44 @@
/* Teensyduino Audio Memcpy
* Copyright (c) 2016 Frank Bösing
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* 1. The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* 2. If the Software is incorporated into a build system that allows
* selection among a list of target devices, then similar target
* devices manufactured by PJRC.COM must be included in the list of
* target devices and selectable in the same manner.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#ifndef memcpy_audio_h_
#define memcpy_audio_h_

#ifdef __cplusplus
extern "C" {
#endif
void memcpy_tointerleaveLR(int16_t *dst, const int16_t *srcL, const int16_t *srcR);
void memcpy_tointerleaveL(int16_t *dst, const int16_t *srcL);
void memcpy_tointerleaveR(int16_t *dst, const int16_t *srcR);
#ifdef __cplusplus
}
#endif


#endif

+ 58
- 4
output_i2s.cpp 파일 보기

@@ -25,6 +25,7 @@
*/

#include "output_i2s.h"
#include "memcpy_audio.h"

audio_block_t * AudioOutputI2S::block_left_1st = NULL;
audio_block_t * AudioOutputI2S::block_right_1st = NULL;
@@ -71,14 +72,67 @@ void AudioOutputI2S::begin(void)

void AudioOutputI2S::isr(void)
{
#if defined(KINETISK)
int16_t *dest;
audio_block_t *blockL, *blockR;
uint32_t saddr, offsetL, offsetR;

saddr = (uint32_t)(dma.TCD->SADDR);
dma.clearInterrupt();
if (saddr < (uint32_t)i2s_tx_buffer + sizeof(i2s_tx_buffer) / 2) {
// DMA is transmitting the first half of the buffer
// so we must fill the second half
dest = (int16_t *)&i2s_tx_buffer[AUDIO_BLOCK_SAMPLES/2];
if (AudioOutputI2S::update_responsibility) AudioStream::update_all();
} else {
// DMA is transmitting the second half of the buffer
// so we must fill the first half
dest = (int16_t *)i2s_tx_buffer;
}

blockL = AudioOutputI2S::block_left_1st;
blockR = AudioOutputI2S::block_right_1st;
offsetL = AudioOutputI2S::block_left_offset;
offsetR = AudioOutputI2S::block_right_offset;

if (blockL && blockR) {
memcpy_tointerleaveLR(dest, blockL->data + offsetL, blockR->data + offsetR);
offsetL += AUDIO_BLOCK_SAMPLES / 2;
offsetR += AUDIO_BLOCK_SAMPLES / 2;
} else if (blockL) {
memcpy_tointerleaveL(dest, blockL->data + offsetL);
offsetL += AUDIO_BLOCK_SAMPLES / 2;
} else if (blockR) {
memcpy_tointerleaveR(dest, blockR->data + offsetR);
offsetR += AUDIO_BLOCK_SAMPLES / 2;
} else {
memset(dest,0,AUDIO_BLOCK_SAMPLES * 2);
return;
}

if (offsetL < AUDIO_BLOCK_SAMPLES) {
AudioOutputI2S::block_left_offset = offsetL;
} else {
AudioOutputI2S::block_left_offset = 0;
AudioStream::release(blockL);
AudioOutputI2S::block_left_1st = AudioOutputI2S::block_left_2nd;
AudioOutputI2S::block_left_2nd = NULL;
}
if (offsetR < AUDIO_BLOCK_SAMPLES) {
AudioOutputI2S::block_right_offset = offsetR;
} else {
AudioOutputI2S::block_right_offset = 0;
AudioStream::release(blockR);
AudioOutputI2S::block_right_1st = AudioOutputI2S::block_right_2nd;
AudioOutputI2S::block_right_2nd = NULL;
}
#else
const int16_t *src, *end;
int16_t *dest;
audio_block_t *block;
uint32_t saddr, offset;

#if defined(KINETISK)
saddr = (uint32_t)(dma.TCD->SADDR);
#endif
saddr = (uint32_t)(dma.CFG->SAR);
dma.clearInterrupt();
if (saddr < (uint32_t)i2s_tx_buffer + sizeof(i2s_tx_buffer) / 2) {
// DMA is transmitting the first half of the buffer
@@ -93,7 +147,6 @@ void AudioOutputI2S::isr(void)
end = (int16_t *)&i2s_tx_buffer[AUDIO_BLOCK_SAMPLES/2];
}

// TODO: these copy routines could be merged and optimized, maybe in assembly?
block = AudioOutputI2S::block_left_1st;
if (block) {
offset = AudioOutputI2S::block_left_offset;
@@ -141,6 +194,7 @@ void AudioOutputI2S::isr(void)
dest += 2;
} while (dest < end);
}
#endif
}



Loading…
취소
저장