| @@ -164,6 +164,47 @@ | |||
| BX lr | |||
| /* void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, const int16_t *src3, const int16_t *src4) */ | |||
| .global memcpy_tointerleaveQuad | |||
| .thumb_func | |||
| memcpy_tointerleaveQuad: | |||
| @ r0: dst | |||
| @ r1: src1 | |||
| @ r2: src2 | |||
| @ r3: src3 | |||
| @ r4: src4 | |||
| push {r4-r11} | |||
| ldr r4, [sp, #(0+32)] //4th parameter is saved on the stack | |||
| add r11,r0,#512 // TODO: 512 = AUDIO_BLOCK_SAMPLES*4 | |||
| .align 2 | |||
| .loopQuad: | |||
| .irp offset, 1,2 | |||
| ldr r5, [r1],4 | |||
| ldr r6, [r3],4 | |||
| pkhbt r7,r5,r6,LSL #16 | |||
| pkhtb r9,r6,r5,ASR #16 | |||
| ldr r5, [r2],4 | |||
| ldr r6, [r4],4 | |||
| pkhbt r8,r5,r6,LSL #16 | |||
| pkhtb r10,r6,r5,ASR #16 | |||
| stmia r0!, {r7-r10} | |||
| .endr | |||
| cmp r11, r0 | |||
| bne .loopQuad | |||
| pop {r4-r11} | |||
| BX lr | |||
| .END | |||
| #endif | |||
| @@ -36,6 +36,8 @@ extern "C" { | |||
| void memcpy_tointerleaveLR(int16_t *dst, const int16_t *srcL, const int16_t *srcR); | |||
| void memcpy_tointerleaveL(int16_t *dst, const int16_t *srcL); | |||
| void memcpy_tointerleaveR(int16_t *dst, const int16_t *srcR); | |||
| void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, | |||
| const int16_t *src3, const int16_t *src4); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -107,12 +107,16 @@ void AudioOutputI2SQuad::isr(void) | |||
| src4 = (block_ch4_1st) ? block_ch4_1st->data + ch4_offset : zeros; | |||
| // TODO: fast 4-way interleaved memcpy... | |||
| #if 1 | |||
| memcpy_tointerleaveQuad(dest, src1, src2, src3, src4); | |||
| #else | |||
| for (int i=0; i < AUDIO_BLOCK_SAMPLES/2; i++) { | |||
| *dest++ = *src1++; | |||
| *dest++ = *src3++; | |||
| *dest++ = *src2++; | |||
| *dest++ = *src4++; | |||
| } | |||
| #endif | |||
| if (block_ch1_1st) { | |||
| if (ch1_offset == 0) { | |||