PlatformIO package of the Teensy core framework compatible with GCC 10 & C++20
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

381 line
17KB

  1. #ifndef __INC_FASTSPI_BITBANG_H
  2. #define __INC_FASTSPI_BITBANG_H
  3. #include "FastLED.h"
  4. #include "fastled_delay.h"
  5. FASTLED_NAMESPACE_BEGIN
  6. //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  7. //
  8. // Software SPI (aka bit-banging) support - with aggressive optimizations for when the clock and data pin are on the same port
  9. //
  10. // TODO: Replace the select pin definition with a set of pins, to allow using mux hardware for routing in the future
  11. //
  12. //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  13. template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint32_t SPI_SPEED>
  14. class AVRSoftwareSPIOutput {
  15. // The data types for pointers to the pin port - typedef'd here from the Pin definition because on avr these
  16. // are pointers to 8 bit values, while on arm they are 32 bit
  17. typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
  18. typedef typename FastPin<CLOCK_PIN>::port_ptr_t clock_ptr_t;
  19. // The data type for what's at a pin's port - typedef'd here from the Pin definition because on avr the ports
  20. // are 8 bits wide while on arm they are 32.
  21. typedef typename FastPin<DATA_PIN>::port_t data_t;
  22. typedef typename FastPin<CLOCK_PIN>::port_t clock_t;
  23. Selectable *m_pSelect;
  24. public:
  25. AVRSoftwareSPIOutput() { m_pSelect = NULL; }
  26. AVRSoftwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
  27. void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
  28. void init() {
  29. // set the pins to output and make sure the select is released (which apparently means hi? This is a bit
  30. // confusing to me)
  31. FastPin<DATA_PIN>::setOutput();
  32. FastPin<CLOCK_PIN>::setOutput();
  33. release();
  34. }
  35. // stop the SPI output. Pretty much a NOP with software, as there's no registers to kick
  36. static void stop() { }
  37. // wait until the SPI subsystem is ready for more data to write. A NOP when bitbanging
  38. static void wait() __attribute__((always_inline)) { }
  39. static void waitFully() __attribute__((always_inline)) { wait(); }
  40. static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { writeByte(b); }
  41. static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { writeByte(b); wait(); }
  42. static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); }
  43. // naive writeByte implelentation, simply calls writeBit on the 8 bits in the byte.
  44. static void writeByte(uint8_t b) {
  45. writeBit<7>(b);
  46. writeBit<6>(b);
  47. writeBit<5>(b);
  48. writeBit<4>(b);
  49. writeBit<3>(b);
  50. writeBit<2>(b);
  51. writeBit<1>(b);
  52. writeBit<0>(b);
  53. }
  54. private:
  55. // writeByte implementation with data/clock registers passed in.
  56. static void writeByte(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin) {
  57. writeBit<7>(b, clockpin, datapin);
  58. writeBit<6>(b, clockpin, datapin);
  59. writeBit<5>(b, clockpin, datapin);
  60. writeBit<4>(b, clockpin, datapin);
  61. writeBit<3>(b, clockpin, datapin);
  62. writeBit<2>(b, clockpin, datapin);
  63. writeBit<1>(b, clockpin, datapin);
  64. writeBit<0>(b, clockpin, datapin);
  65. }
  66. // writeByte implementation with the data register passed in and prebaked values for data hi w/clock hi and
  67. // low and data lo w/clock hi and lo. This is to be used when clock and data are on the same GPIO register,
  68. // can get close to getting a bit out the door in 2 clock cycles!
  69. static void writeByte(uint8_t b, data_ptr_t datapin,
  70. data_t hival, data_t loval,
  71. clock_t hiclock, clock_t loclock) {
  72. writeBit<7>(b, datapin, hival, loval, hiclock, loclock);
  73. writeBit<6>(b, datapin, hival, loval, hiclock, loclock);
  74. writeBit<5>(b, datapin, hival, loval, hiclock, loclock);
  75. writeBit<4>(b, datapin, hival, loval, hiclock, loclock);
  76. writeBit<3>(b, datapin, hival, loval, hiclock, loclock);
  77. writeBit<2>(b, datapin, hival, loval, hiclock, loclock);
  78. writeBit<1>(b, datapin, hival, loval, hiclock, loclock);
  79. writeBit<0>(b, datapin, hival, loval, hiclock, loclock);
  80. }
  81. // writeByte implementation with not just registers passed in, but pre-baked values for said registers for
  82. // data hi/lo and clock hi/lo values. Note: weird things will happen if this method is called in cases where
  83. // the data and clock pins are on the same port! Don't do that!
  84. static void writeByte(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin,
  85. data_t hival, data_t loval,
  86. clock_t hiclock, clock_t loclock) {
  87. writeBit<7>(b, clockpin, datapin, hival, loval, hiclock, loclock);
  88. writeBit<6>(b, clockpin, datapin, hival, loval, hiclock, loclock);
  89. writeBit<5>(b, clockpin, datapin, hival, loval, hiclock, loclock);
  90. writeBit<4>(b, clockpin, datapin, hival, loval, hiclock, loclock);
  91. writeBit<3>(b, clockpin, datapin, hival, loval, hiclock, loclock);
  92. writeBit<2>(b, clockpin, datapin, hival, loval, hiclock, loclock);
  93. writeBit<1>(b, clockpin, datapin, hival, loval, hiclock, loclock);
  94. writeBit<0>(b, clockpin, datapin, hival, loval, hiclock, loclock);
  95. }
  96. public:
  97. // We want to make sure that the clock pulse is held high for a nininum of 35ns.
  98. #if defined(FASTLED_TEENSY4)
  99. #define DELAY_NS (1000 / (SPI_SPEED/1000000))
  100. #define CLOCK_HI_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
  101. #define CLOCK_LO_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
  102. #else
  103. #define MIN_DELAY ((NS(35)>3) ? (NS(35) - 3) : 1)
  104. #define CLOCK_HI_DELAY do { delaycycles<MIN_DELAY>(); delaycycles<((SPI_SPEED > 10) ? (((SPI_SPEED-6) / 2) - MIN_DELAY) : (SPI_SPEED))>(); } while(0);
  105. #define CLOCK_LO_DELAY do { delaycycles<((SPI_SPEED > 10) ? ((SPI_SPEED-6) / 2) : (SPI_SPEED))>(); } while(0);
  106. #endif
  107. // write the BIT'th bit out via spi, setting the data pin then strobing the clcok
  108. template <uint8_t BIT> __attribute__((always_inline, hot)) inline static void writeBit(uint8_t b) {
  109. //cli();
  110. if(b & (1 << BIT)) {
  111. FastPin<DATA_PIN>::hi();
  112. #ifdef ESP32
  113. // try to ensure we never have adjacent write opcodes to the same register
  114. FastPin<CLOCK_PIN>::lo();
  115. FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
  116. FastPin<CLOCK_PIN>::toggle(); CLOCK_LO_DELAY;
  117. #else
  118. FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
  119. FastPin<CLOCK_PIN>::lo(); CLOCK_LO_DELAY;
  120. #endif
  121. } else {
  122. FastPin<DATA_PIN>::lo();
  123. FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
  124. #ifdef ESP32
  125. // try to ensure we never have adjacent write opcodes to the same register
  126. FastPin<CLOCK_PIN>::toggle(); CLOCK_HI_DELAY;
  127. #else
  128. FastPin<CLOCK_PIN>::lo(); CLOCK_LO_DELAY;
  129. #endif
  130. }
  131. //sei();
  132. }
  133. private:
  134. // write the BIT'th bit out via spi, setting the data pin then strobing the clock, using the passed in pin registers to accelerate access if needed
  135. template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin) {
  136. if(b & (1 << BIT)) {
  137. FastPin<DATA_PIN>::hi(datapin);
  138. FastPin<CLOCK_PIN>::hi(clockpin); CLOCK_HI_DELAY;
  139. FastPin<CLOCK_PIN>::lo(clockpin); CLOCK_LO_DELAY;
  140. } else {
  141. FastPin<DATA_PIN>::lo(datapin);
  142. FastPin<CLOCK_PIN>::hi(clockpin); CLOCK_HI_DELAY;
  143. FastPin<CLOCK_PIN>::lo(clockpin); CLOCK_LO_DELAY;
  144. }
  145. }
  146. // the version of write to use when clock and data are on separate pins with precomputed values for setting
  147. // the clock and data pins
  148. template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin,
  149. data_t hival, data_t loval, clock_t hiclock, clock_t loclock) {
  150. // // only need to explicitly set clock hi if clock and data are on different ports
  151. if(b & (1 << BIT)) {
  152. FastPin<DATA_PIN>::fastset(datapin, hival);
  153. FastPin<CLOCK_PIN>::fastset(clockpin, hiclock); CLOCK_HI_DELAY;
  154. FastPin<CLOCK_PIN>::fastset(clockpin, loclock); CLOCK_LO_DELAY;
  155. } else {
  156. // FL_NOP;
  157. FastPin<DATA_PIN>::fastset(datapin, loval);
  158. FastPin<CLOCK_PIN>::fastset(clockpin, hiclock); CLOCK_HI_DELAY;
  159. FastPin<CLOCK_PIN>::fastset(clockpin, loclock); CLOCK_LO_DELAY;
  160. }
  161. }
  162. // the version of write to use when clock and data are on the same port with precomputed values for the various
  163. // combinations
  164. template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, data_ptr_t clockdatapin,
  165. data_t datahiclockhi, data_t dataloclockhi,
  166. data_t datahiclocklo, data_t dataloclocklo) {
  167. #if 0
  168. writeBit<BIT>(b);
  169. #else
  170. if(b & (1 << BIT)) {
  171. FastPin<DATA_PIN>::fastset(clockdatapin, datahiclocklo);
  172. FastPin<DATA_PIN>::fastset(clockdatapin, datahiclockhi); CLOCK_HI_DELAY;
  173. FastPin<DATA_PIN>::fastset(clockdatapin, datahiclocklo); CLOCK_LO_DELAY;
  174. } else {
  175. // FL_NOP;
  176. FastPin<DATA_PIN>::fastset(clockdatapin, dataloclocklo);
  177. FastPin<DATA_PIN>::fastset(clockdatapin, dataloclockhi); CLOCK_HI_DELAY;
  178. FastPin<DATA_PIN>::fastset(clockdatapin, dataloclocklo); CLOCK_LO_DELAY;
  179. }
  180. #endif
  181. }
  182. public:
  183. // select the SPI output (TODO: research whether this really means hi or lo. Alt TODO: move select responsibility out of the SPI classes
  184. // entirely, make it up to the caller to remember to lock/select the line?)
  185. void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } // FastPin<SELECT_PIN>::hi(); }
  186. // release the SPI line
  187. void release() { if(m_pSelect != NULL) { m_pSelect->release(); } } // FastPin<SELECT_PIN>::lo(); }
  188. // Write out len bytes of the given value out over SPI. Useful for quickly flushing, say, a line of 0's down the line.
  189. void writeBytesValue(uint8_t value, int len) {
  190. select();
  191. writeBytesValueRaw(value, len);
  192. release();
  193. }
  194. static void writeBytesValueRaw(uint8_t value, int len) {
  195. #ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
  196. // TODO: Weird things may happen if software bitbanging SPI output and other pins on the output reigsters are being twiddled. Need
  197. // to allow specifying whether or not exclusive i/o access is allowed during this process, and if i/o access is not allowed fall
  198. // back to the degenerative code below
  199. while(len--) {
  200. writeByte(value);
  201. }
  202. #else
  203. register data_ptr_t datapin = FastPin<DATA_PIN>::port();
  204. if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
  205. // If data and clock are on different ports, then writing a bit will consist of writing the value foor
  206. // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
  207. register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
  208. register data_t datahi = FastPin<DATA_PIN>::hival();
  209. register data_t datalo = FastPin<DATA_PIN>::loval();
  210. register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
  211. register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
  212. while(len--) {
  213. writeByte(value, clockpin, datapin, datahi, datalo, clockhi, clocklo);
  214. }
  215. } else {
  216. // If data and clock are on the same port then we can combine setting the data and clock pins
  217. register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
  218. register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
  219. register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
  220. register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
  221. while(len--) {
  222. writeByte(value, datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
  223. }
  224. }
  225. #endif
  226. }
  227. // write a block of len uint8_ts out. Need to type this better so that explicit casts into the call aren't required.
  228. // note that this template version takes a class parameter for a per-byte modifier to the data.
  229. template <class D> void writeBytes(register uint8_t *data, int len) {
  230. select();
  231. #ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
  232. uint8_t *end = data + len;
  233. while(data != end) {
  234. writeByte(D::adjust(*data++));
  235. }
  236. #else
  237. register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
  238. register data_ptr_t datapin = FastPin<DATA_PIN>::port();
  239. if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
  240. // If data and clock are on different ports, then writing a bit will consist of writing the value foor
  241. // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
  242. register data_t datahi = FastPin<DATA_PIN>::hival();
  243. register data_t datalo = FastPin<DATA_PIN>::loval();
  244. register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
  245. register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
  246. uint8_t *end = data + len;
  247. while(data != end) {
  248. writeByte(D::adjust(*data++), clockpin, datapin, datahi, datalo, clockhi, clocklo);
  249. }
  250. } else {
  251. // FastPin<CLOCK_PIN>::hi();
  252. // If data and clock are on the same port then we can combine setting the data and clock pins
  253. register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
  254. register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
  255. register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
  256. register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
  257. uint8_t *end = data + len;
  258. while(data != end) {
  259. writeByte(D::adjust(*data++), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
  260. }
  261. // FastPin<CLOCK_PIN>::lo();
  262. }
  263. #endif
  264. D::postBlock(len);
  265. release();
  266. }
  267. // default version of writing a block of data out to the SPI port, with no data modifications being made
  268. void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
  269. // write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template
  270. // parameters indicate how many uint8_ts to skip at the beginning of each grouping, as well as a class specifying a per
  271. // byte of data modification to be made. (See DATA_NOP above)
  272. template <uint8_t FLAGS, class D, EOrder RGB_ORDER> __attribute__((noinline)) void writePixels(PixelController<RGB_ORDER> pixels) {
  273. select();
  274. int len = pixels.mLen;
  275. #ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
  276. // If interrupts or other things may be generating output while we're working on things, then we need
  277. // to use this block
  278. while(pixels.has(1)) {
  279. if(FLAGS & FLAG_START_BIT) {
  280. writeBit<0>(1);
  281. }
  282. writeByte(D::adjust(pixels.loadAndScale0()));
  283. writeByte(D::adjust(pixels.loadAndScale1()));
  284. writeByte(D::adjust(pixels.loadAndScale2()));
  285. pixels.advanceData();
  286. pixels.stepDithering();
  287. }
  288. #else
  289. // If we can guaruntee that no one else will be writing data while we are running (namely, changing the values of the PORT/PDOR pins)
  290. // then we can use a bunch of optimizations in here
  291. register data_ptr_t datapin = FastPin<DATA_PIN>::port();
  292. if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
  293. register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
  294. // If data and clock are on different ports, then writing a bit will consist of writing the value foor
  295. // the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
  296. register data_t datahi = FastPin<DATA_PIN>::hival();
  297. register data_t datalo = FastPin<DATA_PIN>::loval();
  298. register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
  299. register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
  300. while(pixels.has(1)) {
  301. if(FLAGS & FLAG_START_BIT) {
  302. writeBit<0>(1, clockpin, datapin, datahi, datalo, clockhi, clocklo);
  303. }
  304. writeByte(D::adjust(pixels.loadAndScale0()), clockpin, datapin, datahi, datalo, clockhi, clocklo);
  305. writeByte(D::adjust(pixels.loadAndScale1()), clockpin, datapin, datahi, datalo, clockhi, clocklo);
  306. writeByte(D::adjust(pixels.loadAndScale2()), clockpin, datapin, datahi, datalo, clockhi, clocklo);
  307. pixels.advanceData();
  308. pixels.stepDithering();
  309. }
  310. } else {
  311. // If data and clock are on the same port then we can combine setting the data and clock pins
  312. register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
  313. register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
  314. register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
  315. register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
  316. while(pixels.has(1)) {
  317. if(FLAGS & FLAG_START_BIT) {
  318. writeBit<0>(1, datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
  319. }
  320. writeByte(D::adjust(pixels.loadAndScale0()), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
  321. writeByte(D::adjust(pixels.loadAndScale1()), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
  322. writeByte(D::adjust(pixels.loadAndScale2()), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
  323. pixels.advanceData();
  324. pixels.stepDithering();
  325. }
  326. }
  327. #endif
  328. D::postBlock(len);
  329. release();
  330. }
  331. };
  332. FASTLED_NAMESPACE_END
  333. #endif