|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332 |
-
-
- #if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__)
-
-
-
- #ifndef __OPT_BIG_BLOCK_SIZE
- #define __OPT_BIG_BLOCK_SIZE (4 * 16)
- #endif
-
- #ifndef __OPT_MID_BLOCK_SIZE
- #define __OPT_MID_BLOCK_SIZE (4 * 4)
- #endif
-
- #if __OPT_BIG_BLOCK_SIZE == 16
- #define BEGIN_UNROLL_BIG_BLOCK \
- .irp offset, 0,4,8,12
- #elif __OPT_BIG_BLOCK_SIZE == 32
- #define BEGIN_UNROLL_BIG_BLOCK \
- .irp offset, 0,4,8,12,16,20,24,28
- #elif __OPT_BIG_BLOCK_SIZE == 64
- #define BEGIN_UNROLL_BIG_BLOCK \
- .irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
- #else
- #error "Illegal __OPT_BIG_BLOCK_SIZE"
- #endif
-
- #if __OPT_MID_BLOCK_SIZE == 8
- #define BEGIN_UNROLL_MID_BLOCK \
- .irp offset, 0,4
- #elif __OPT_MID_BLOCK_SIZE == 16
- #define BEGIN_UNROLL_MID_BLOCK \
- .irp offset, 0,4,8,12
- #else
- #error "Illegal __OPT_MID_BLOCK_SIZE"
- #endif
-
- #define END_UNROLL .endr
-
- .syntax unified
- .text
- .align 2
- .global memcpy
- .thumb
- .thumb_func
- .type memcpy, %function
- memcpy:
-
-
-
- #ifdef __ARM_FEATURE_UNALIGNED
-
-
- mov ip, r0
- #else
- push {r0}
- #endif
- orr r3, r1, r0
- ands r3, r3, #3
- bne .Lmisaligned_copy
-
- .Lbig_block:
- subs r2, __OPT_BIG_BLOCK_SIZE
- blo .Lmid_block
-
-
- .align 2
- .Lbig_block_loop:
- BEGIN_UNROLL_BIG_BLOCK
- #ifdef __ARM_ARCH_7EM__
- ldr r3, [r1], #4
- str r3, [r0], #4
- END_UNROLL
- #else
- ldr r3, [r1, \offset]
- str r3, [r0, \offset]
- END_UNROLL
- adds r0, __OPT_BIG_BLOCK_SIZE
- adds r1, __OPT_BIG_BLOCK_SIZE
- #endif
- subs r2, __OPT_BIG_BLOCK_SIZE
- bhs .Lbig_block_loop
-
- .Lmid_block:
- adds r2, __OPT_BIG_BLOCK_SIZE - __OPT_MID_BLOCK_SIZE
- blo .Lcopy_word_by_word
-
-
- .align 2
- .Lmid_block_loop:
- BEGIN_UNROLL_MID_BLOCK
- #ifdef __ARM_ARCH_7EM__
- ldr r3, [r1], #4
- str r3, [r0], #4
- END_UNROLL
- #else
- ldr r3, [r1, \offset]
- str r3, [r0, \offset]
- END_UNROLL
- adds r0, __OPT_MID_BLOCK_SIZE
- adds r1, __OPT_MID_BLOCK_SIZE
- #endif
- subs r2, __OPT_MID_BLOCK_SIZE
- bhs .Lmid_block_loop
-
- .Lcopy_word_by_word:
- adds r2, __OPT_MID_BLOCK_SIZE - 4
- blo .Lcopy_less_than_4
-
-
- .align 2
- .Lcopy_word_by_word_loop:
- ldr r3, [r1], #4
- str r3, [r0], #4
- subs r2, #4
- bhs .Lcopy_word_by_word_loop
-
- .Lcopy_less_than_4:
- adds r2, #4
- beq .Ldone
-
- lsls r2, r2, #31
- itt ne
- ldrbne r3, [r1], #1
- strbne r3, [r0], #1
-
- bcc .Ldone
- #ifdef __ARM_FEATURE_UNALIGNED
- ldrh r3, [r1]
- strh r3, [r0]
- #else
- ldrb r3, [r1]
- strb r3, [r0]
- ldrb r3, [r1, #1]
- strb r3, [r0, #1]
- #endif
-
- .Ldone:
- #ifdef __ARM_FEATURE_UNALIGNED
- mov r0, ip
- #else
- pop {r0}
- #endif
- bx lr
-
- .align 2
- .Lmisaligned_copy:
- #ifdef __ARM_FEATURE_UNALIGNED
-
-
- #define Ldst_aligned Lbig_block
-
-
-
-
- cmp r2, #8
- blo .Lbyte_copy
-
-
- lsls r3, r1, #30
- beq .Ldst_aligned
- #else
-
-
- cmp r2, #12
- blo .Lbyte_copy
- #endif
-
-
-
-
- ands r3, r0, #3
- beq .Ldst_aligned
-
- rsb r3, #4
- subs r2, r3
-
- lsls r3, r3, #31
- itt ne
- ldrbne r3, [r1], #1
- strbne r3, [r0], #1
-
- bcc .Ldst_aligned
-
- #ifdef __ARM_FEATURE_UNALIGNED
- ldrh r3, [r1], #2
- strh r3, [r0], #2
- b .Ldst_aligned
- #else
- ldrb r3, [r1], #1
- strb r3, [r0], #1
- ldrb r3, [r1], #1
- strb r3, [r0], #1
-
- .Ldst_aligned:
-
-
- ands r3, r1, #3
- beq .Lbig_block
-
-
-
- push {r4, r5}
- subs r2, #4
-
-
-
- subs r1, r3
- rsb ip, r3, #4
-
-
- ldr r4, [r1], #4
-
- cmp r3, #2
- beq .Lmisaligned_copy_2_2
- cmp r3, #3
- beq .Lmisaligned_copy_3_1
-
- .macro mis_src_copy shift
- 1:
- lsrs r4, r4, \shift
- ldr r3, [r1], #4
- lsls r5, r3, 32-\shift
- orr r4, r4, r5
- str r4, [r0], #4
- mov r4, r3
- subs r2, #4
- bhs 1b
- .endm
-
- .Lmisaligned_copy_1_3:
- mis_src_copy shift=8
- b .Lsrc_misaligned_tail
-
- .Lmisaligned_copy_3_1:
- mis_src_copy shift=24
- b .Lsrc_misaligned_tail
-
- .Lmisaligned_copy_2_2:
-
- mis_src_copy shift=16
-
- .Lsrc_misaligned_tail:
- adds r2, #4
- subs r1, ip
- pop {r4, r5}
-
- #endif
-
- .Lbyte_copy:
- subs r2, #4
- blo .Lcopy_less_than_4
-
- .Lbyte_copy_loop:
- subs r2, #1
- ldrb r3, [r1], #1
- strb r3, [r0], #1
- bhs .Lbyte_copy_loop
-
- ldrb r3, [r1]
- strb r3, [r0]
- ldrb r3, [r1, #1]
- strb r3, [r0, #1]
- ldrb r3, [r1, #2]
- strb r3, [r0, #2]
-
- #ifdef __ARM_FEATURE_UNALIGNED
- mov r0, ip
- #else
- pop {r0}
- #endif
- bx lr
-
- .size memcpy, .-memcpy
- #endif
|