@@ -164,6 +164,47 @@ | |||
BX lr | |||
/* void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, const int16_t *src3, const int16_t *src4) */ | |||
.global memcpy_tointerleaveQuad | |||
.thumb_func | |||
memcpy_tointerleaveQuad: | |||
@ r0: dst | |||
@ r1: src1 | |||
@ r2: src2 | |||
@ r3: src3 | |||
@ r4: src4 | |||
push {r4-r11} | |||
ldr r4, [sp, #(0+32)] //4th parameter is saved on the stack | |||
add r11,r0,#512 // TODO: 512 = AUDIO_BLOCK_SAMPLES*4 | |||
.align 2 | |||
.loopQuad: | |||
.irp offset, 1,2 | |||
ldr r5, [r1],4 | |||
ldr r6, [r3],4 | |||
pkhbt r7,r5,r6,LSL #16 | |||
pkhtb r9,r6,r5,ASR #16 | |||
ldr r5, [r2],4 | |||
ldr r6, [r4],4 | |||
pkhbt r8,r5,r6,LSL #16 | |||
pkhtb r10,r6,r5,ASR #16 | |||
stmia r0!, {r7-r10} | |||
.endr | |||
cmp r11, r0 | |||
bne .loopQuad | |||
pop {r4-r11} | |||
BX lr | |||
.END | |||
#endif |
@@ -36,6 +36,8 @@ extern "C" { | |||
void memcpy_tointerleaveLR(int16_t *dst, const int16_t *srcL, const int16_t *srcR); | |||
void memcpy_tointerleaveL(int16_t *dst, const int16_t *srcL); | |||
void memcpy_tointerleaveR(int16_t *dst, const int16_t *srcR); | |||
void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, | |||
const int16_t *src3, const int16_t *src4); | |||
#ifdef __cplusplus | |||
} | |||
#endif |
@@ -107,12 +107,16 @@ void AudioOutputI2SQuad::isr(void) | |||
src4 = (block_ch4_1st) ? block_ch4_1st->data + ch4_offset : zeros; | |||
// TODO: fast 4-way interleaved memcpy... | |||
#if 1 | |||
memcpy_tointerleaveQuad(dest, src1, src2, src3, src4); | |||
#else | |||
for (int i=0; i < AUDIO_BLOCK_SAMPLES/2; i++) { | |||
*dest++ = *src1++; | |||
*dest++ = *src3++; | |||
*dest++ = *src2++; | |||
*dest++ = *src4++; | |||
} | |||
#endif | |||
if (block_ch1_1st) { | |||
if (ch1_offset == 0) { |