|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389 |
- #ifndef __INC_M0_CLOCKLESS_H
- #define __INC_M0_CLOCKLESS_H
-
- struct M0ClocklessData {
- uint8_t d[3];
- uint8_t e[3];
- uint8_t adj;
- uint8_t pad;
- uint32_t s[3];
- };
-
-
- template<int HI_OFFSET, int LO_OFFSET, int T1, int T2, int T3, EOrder RGB_ORDER, int WAIT_TIME>int
- showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, uint32_t num_leds, struct M0ClocklessData *pData) {
- // Lo register variables
- register uint32_t scratch=0;
- register struct M0ClocklessData *base = pData;
- register volatile uint32_t *port = _port;
- register uint32_t d=0;
- register uint32_t counter=num_leds;
- register uint32_t bn=0;
- register uint32_t b=0;
- register uint32_t bitmask = _bitmask;
-
- // high register variable
- register const uint8_t *leds = _leds;
- #if (FASTLED_SCALE8_FIXED == 1)
- pData->s[0]++;
- pData->s[1]++;
- pData->s[2]++;
- #endif
- asm __volatile__ (
- ///////////////////////////////////////////////////////////////////////////
- //
- // asm macro definitions - used to assemble the clockless output
- //
- ".ifnotdef fl_delay_def;"
- #ifdef FASTLED_ARM_M0_PLUS
- " .set fl_is_m0p, 1;"
- " .macro m0pad;"
- " nop;"
- " .endm;"
- #else
- " .set fl_is_m0p, 0;"
- " .macro m0pad;"
- " .endm;"
- #endif
- " .set fl_delay_def, 1;"
- " .set fl_delay_mod, 4;"
- " .if fl_is_m0p == 1;"
- " .set fl_delay_mod, 3;"
- " .endif;"
- " .macro fl_delay dtime, reg=r0;"
- " .if (\\dtime > 0);"
- " .set dcycle, (\\dtime / fl_delay_mod);"
- " .set dwork, (dcycle * fl_delay_mod);"
- " .set drem, (\\dtime - dwork);"
- " .rept (drem);"
- " nop;"
- " .endr;"
- " .if dcycle > 0;"
- " mov \\reg, #dcycle;"
- " delayloop_\\@:;"
- " sub \\reg, #1;"
- " bne delayloop_\\@;"
- " .if fl_is_m0p == 0;"
- " nop;"
- " .endif;"
- " .endif;"
- " .endif;"
- " .endm;"
-
- " .macro mod_delay dtime,b1,b2,reg;"
- " .set adj, (\\b1 + \\b2);"
- " .if adj < \\dtime;"
- " .set dtime2, (\\dtime - adj);"
- " fl_delay dtime2, \\reg;"
- " .endif;"
- " .endm;"
-
- // check the bit and drop the line low if it isn't set
- " .macro qlo4 b,bitmask,port,loff ;"
- " lsl \\b, #1 ;"
- " bcs skip_\\@ ;"
- " str \\bitmask, [\\port, \\loff] ;"
- " skip_\\@: ;"
- " m0pad;"
- " .endm ;"
-
- // set the pin hi or low (determined by the offset passed in )
- " .macro qset2 bitmask,port,loff;"
- " str \\bitmask, [\\port, \\loff];"
- " m0pad;"
- " .endm;"
-
- // Load up the next led byte to work with, put it in bn
- " .macro loadleds3 leds, bn, rled, scratch;"
- " mov \\scratch, \\leds;"
- " ldrb \\bn, [\\scratch, \\rled];"
- " .endm;"
-
- // check whether or not we should dither
- " .macro loaddither7 bn,d,base,rdither;"
- " ldrb \\d, [\\base, \\rdither];"
- " lsl \\d, #24;" //; shift high for the qadd w/bn
- " lsl \\bn, #24;" //; shift high for the qadd w/d
- " bne chkskip_\\@;" //; if bn==0, clear d;"
- " eor \\d, \\d;" //; clear d;"
- " m0pad;"
- " chkskip_\\@:;"
- " .endm;"
-
- // Do the qadd8 for dithering -- there's two versions of this. The m0 version
- // takes advantage of the 3 cycle branch to do two things after the branch,
- // while keeping timing constant. The m0+, however, branches in 2 cycles, so
- // we have to work around that a bit more. This is one of the few times
- // where the m0 will actually be _more_ efficient than the m0+
- " .macro dither5 bn,d;"
- " .syntax unified;"
- " .if fl_is_m0p == 0;"
- " adds \\bn, \\d;" // do the add
- " bcc dither5_1_\\@;"
- " mvns \\bn, \\bn;" // set the low 24bits ot 1's
- " lsls \\bn, \\bn, #24;" // move low 8 bits to the high bits
- " dither5_1_\\@:;"
- " nop;" // nop to keep timing in line
- " .else;"
- " adds \\bn, \\d;" // do the add"
- " bcc dither5_2_\\@;"
- " mvns \\bn, \\bn;" // set the low 24bits ot 1's
- " dither5_2_\\@:;"
- " bcc dither5_3_\\@;"
- " lsls \\bn, \\bn, #24;" // move low 8 bits to the high bits
- " dither5_3_\\@:;"
- " .endif;"
- " .syntax divided;"
- " .endm;"
-
- // Do our scaling
- " .macro scale4 bn, base, scale, scratch;"
- " ldr \\scratch, [\\base, \\scale];"
- " lsr \\bn, \\bn, #24;" // bring bn back down to its low 8 bits
- " mul \\bn, \\scratch;" // do the multiply
- " .endm;"
-
- // swap bn into b
- " .macro swapbbn1 b,bn;"
- " lsl \\b, \\bn, #16;" // put the 8 bits we want for output high
- " .endm;"
-
- // adjust the dithering value for the next time around (load e from memory
- // to do the math)
- " .macro adjdither7 base,d,rled,eoffset,scratch;"
- " ldrb \\d, [\\base, \\rled];"
- " ldrb \\scratch,[\\base,\\eoffset];" // load e
- " .syntax unified;"
- " subs \\d, \\scratch, \\d;" // d=e-d
- " .syntax divided;"
- " strb \\d, [\\base, \\rled];" // save d
- " .endm;"
-
- // increment the led pointer (base+6 has what we're incrementing by)
- " .macro incleds3 leds, base, scratch;"
- " ldrb \\scratch, [\\base, #6];" // load incremen
- " add \\leds, \\leds, \\scratch;" // update leds pointer
- " .endm;"
-
- // compare and loop
- " .macro cmploop5 counter,label;"
- " .syntax unified;"
- " subs \\counter, #1;"
- " .syntax divided;"
- " beq done_\\@;"
- " m0pad;"
- " b \\label;"
- " done_\\@:;"
- " .endm;"
-
- " .endif;"
- );
-
- #define M0_ASM_ARGS : \
- [leds] "+h" (leds), \
- [counter] "+l" (counter), \
- [scratch] "+l" (scratch), \
- [d] "+l" (d), \
- [bn] "+l" (bn), \
- [b] "+l" (b) \
- : \
- [port] "l" (port), \
- [base] "l" (base), \
- [bitmask] "l" (bitmask), \
- [hi_off] "I" (HI_OFFSET), \
- [lo_off] "I" (LO_OFFSET), \
- [led0] "I" (RO(0)), \
- [led1] "I" (RO(1)), \
- [led2] "I" (RO(2)), \
- [e0] "I" (3+RO(0)), \
- [e1] "I" (3+RO(1)), \
- [e2] "I" (3+RO(2)), \
- [scale0] "I" (4*(2+RO(0))), \
- [scale1] "I" (4*(2+RO(1))), \
- [scale2] "I" (4*(2+RO(2))), \
- [T1] "I" (T1), \
- [T2] "I" (T2), \
- [T3] "I" (T3) \
- :
-
- /////////////////////////////////////////////////////////////////////////
- // now for some convinience macros to make building our lines a bit cleaner
- #define LOOP " loop_%=:"
- #define HI2 " qset2 %[bitmask], %[port], %[hi_off];"
- #define _D1 " mod_delay %c[T1],2,0,%[scratch];"
- #define QLO4 " qlo4 %[b],%[bitmask],%[port], %[lo_off];"
- #define LOADLEDS3(X) " loadleds3 %[leds], %[bn], %[led" #X "] ,%[scratch];"
- #define _D2(ADJ) " mod_delay %c[T2],4," #ADJ ",%[scratch];"
- #define LO2 " qset2 %[bitmask], %[port], %[lo_off];"
- #define _D3(ADJ) " mod_delay %c[T3],2," #ADJ ",%[scratch];"
- #define LOADDITHER7(X) " loaddither7 %[bn], %[d], %[base], %[led" #X "];"
- #define DITHER5 " dither5 %[bn], %[d];"
- #define SCALE4(X) " scale4 %[bn], %[base], %[scale" #X "], %[scratch];"
- #define SWAPBBN1 " swapbbn1 %[b], %[bn];"
- #define ADJDITHER7(X) " adjdither7 %[base],%[d],%[led" #X "],%[e" #X "],%[scratch];"
- #define INCLEDS3 " incleds3 %[leds],%[base],%[scratch];"
- #define CMPLOOP5 " cmploop5 %[counter], loop_%=;"
- #define NOTHING ""
-
- #if (defined(SEI_CHK) && (FASTLED_ALLOW_INTERRUPTS == 1))
- // We're allowing interrupts and have hardware timer support defined -
- // track the loop outside the asm code, to allow inserting the interrupt
- // overrun checks.
- asm __volatile__ (
- // pre-load byte 0
- LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
- M0_ASM_ARGS);
-
- do {
- asm __volatile__ (
- // Write out byte 0, prepping byte 1
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(1) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(1) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(1) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(1) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0)
-
- // Write out byte 1, prepping byte 2
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(2) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(2) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(2) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(2) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0)
-
- // Write out byte 2, prepping byte 0
- HI2 _D1 QLO4 INCLEDS3 _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(0) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(0) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(0) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(0) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(5)
-
- M0_ASM_ARGS
- );
- SEI_CHK; INNER_SEI; --counter; CLI_CHK;
- } while(counter);
- #elif (FASTLED_ALLOW_INTERRUPTS == 1)
- // We're allowing interrupts - track the loop outside the asm code, and
- // re-enable interrupts in between each iteration.
- asm __volatile__ (
- // pre-load byte 0
- LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
- M0_ASM_ARGS);
-
- do {
- asm __volatile__ (
- // Write out byte 0, prepping byte 1
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(1) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(1) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(1) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(1) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0)
-
- // Write out byte 1, prepping byte 2
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(2) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(2) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(2) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(2) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 INCLEDS3 _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0)
-
- // Write out byte 2, prepping byte 0
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(0) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(0) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(0) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(0) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(5)
-
- M0_ASM_ARGS
- );
-
- uint32_t ticksBeforeInterrupts = SysTick->VAL;
- sei();
- --counter;
- cli();
-
- // If more than 45 uSecs have elapsed, give up on this frame and start over.
- // Note: this isn't completely correct. It's possible that more than one
- // millisecond will elapse, and so SysTick->VAL will lap
- // ticksBeforeInterrupts.
- // Note: ticksBeforeInterrupts DECREASES
- const uint32_t kTicksPerMs = VARIANT_MCK / 1000;
- const uint32_t kTicksPerUs = kTicksPerMs / 1000;
- const uint32_t kTicksIn45us = kTicksPerUs * 45;
-
- const uint32_t currentTicks = SysTick->VAL;
-
- if (ticksBeforeInterrupts < currentTicks) {
- // Timer started over
- if ((ticksBeforeInterrupts + (kTicksPerMs - currentTicks)) > kTicksIn45us) {
- return 0;
- }
- } else {
- if ((ticksBeforeInterrupts - currentTicks) > kTicksIn45us) {
- return 0;
- }
- }
- } while(counter);
- #else
- // We're not allowing interrupts - run the entire loop in asm to keep things
- // as tight as possible. In an ideal world, we should be pushing out ws281x
- // leds (or other 3-wire leds) with zero gaps between pixels.
- asm __volatile__ (
- // pre-load byte 0
- LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
-
- // loop over writing out the data
- LOOP
- // Write out byte 0, prepping byte 1
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(1) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(1) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(1) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(1) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0)
-
- // Write out byte 1, prepping byte 2
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(2) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(2) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(2) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(2) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 INCLEDS3 _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0)
-
- // Write out byte 2, prepping byte 0
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 LOADLEDS3(0) _D2(3) LO2 _D3(0)
- HI2 _D1 QLO4 LOADDITHER7(0) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0)
- HI2 _D1 QLO4 SCALE4(0) _D2(4) LO2 _D3(0)
- HI2 _D1 QLO4 ADJDITHER7(0) _D2(7) LO2 _D3(0)
- HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0)
- HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(5) CMPLOOP5
-
- M0_ASM_ARGS
- );
- #endif
- return num_leds;
- }
-
- #endif
|