BX lr | BX lr | ||||
/* void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, const int16_t *src3, const int16_t *src4) */ | |||||
.global memcpy_tointerleaveQuad | |||||
.thumb_func | |||||
memcpy_tointerleaveQuad: | |||||
@ r0: dst | |||||
@ r1: src1 | |||||
@ r2: src2 | |||||
@ r3: src3 | |||||
@ r4: src4 | |||||
push {r4-r11} | |||||
ldr r4, [sp, #(0+32)] //4th parameter is saved on the stack | |||||
add r11,r0,#512 // TODO: 512 = AUDIO_BLOCK_SAMPLES*4 | |||||
.align 2 | |||||
.loopQuad: | |||||
.irp offset, 1,2 | |||||
ldr r5, [r1],4 | |||||
ldr r6, [r3],4 | |||||
pkhbt r7,r5,r6,LSL #16 | |||||
pkhtb r9,r6,r5,ASR #16 | |||||
ldr r5, [r2],4 | |||||
ldr r6, [r4],4 | |||||
pkhbt r8,r5,r6,LSL #16 | |||||
pkhtb r10,r6,r5,ASR #16 | |||||
stmia r0!, {r7-r10} | |||||
.endr | |||||
cmp r11, r0 | |||||
bne .loopQuad | |||||
pop {r4-r11} | |||||
BX lr | |||||
.END | .END | ||||
#endif | #endif |
void memcpy_tointerleaveLR(int16_t *dst, const int16_t *srcL, const int16_t *srcR); | void memcpy_tointerleaveLR(int16_t *dst, const int16_t *srcL, const int16_t *srcR); | ||||
void memcpy_tointerleaveL(int16_t *dst, const int16_t *srcL); | void memcpy_tointerleaveL(int16_t *dst, const int16_t *srcL); | ||||
void memcpy_tointerleaveR(int16_t *dst, const int16_t *srcR); | void memcpy_tointerleaveR(int16_t *dst, const int16_t *srcR); | ||||
void memcpy_tointerleaveQuad(int16_t *dst, const int16_t *src1, const int16_t *src2, | |||||
const int16_t *src3, const int16_t *src4); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif |
src4 = (block_ch4_1st) ? block_ch4_1st->data + ch4_offset : zeros; | src4 = (block_ch4_1st) ? block_ch4_1st->data + ch4_offset : zeros; | ||||
// TODO: fast 4-way interleaved memcpy... | // TODO: fast 4-way interleaved memcpy... | ||||
#if 1 | |||||
memcpy_tointerleaveQuad(dest, src1, src2, src3, src4); | |||||
#else | |||||
for (int i=0; i < AUDIO_BLOCK_SAMPLES/2; i++) { | for (int i=0; i < AUDIO_BLOCK_SAMPLES/2; i++) { | ||||
*dest++ = *src1++; | *dest++ = *src1++; | ||||
*dest++ = *src3++; | *dest++ = *src3++; | ||||
*dest++ = *src2++; | *dest++ = *src2++; | ||||
*dest++ = *src4++; | *dest++ = *src4++; | ||||
} | } | ||||
#endif | |||||
if (block_ch1_1st) { | if (block_ch1_1st) { | ||||
if (ch1_offset == 0) { | if (ch1_offset == 0) { |