PlatformIO package of the Teensy core framework compatible with GCC 10 & C++20
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

OctoWS2811_imxrt.cpp 13KB

3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /* OctoWS2811 - High Performance WS2811 LED Display Library
  2. http://www.pjrc.com/teensy/td_libs_OctoWS2811.html
  3. Copyright (c) 2020 Paul Stoffregen, PJRC.COM, LLC
  4. Permission is hereby granted, free of charge, to any person obtaining a copy
  5. of this software and associated documentation files (the "Software"), to deal
  6. in the Software without restriction, including without limitation the rights
  7. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. copies of the Software, and to permit persons to whom the Software is
  9. furnished to do so, subject to the following conditions:
  10. The above copyright notice and this permission notice shall be included in
  11. all copies or substantial portions of the Software.
  12. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  18. THE SOFTWARE.
  19. */
  20. #include <Arduino.h>
  21. #include "OctoWS2811.h"
  22. #if defined(__IMXRT1062__)
  23. #define TH_TL 1.25e-6
  24. #define T0H 0.30e-6
  25. #define T1H 0.75e-6
  26. // Ordinary RGB data is converted to GPIO bitmasks on-the-fly using
  27. // a transmit buffer sized for 2 DMA transfers. The larger this setting,
  28. // the more interrupt latency OctoWS2811 can tolerate, but the transmit
  29. // buffer grows in size. For good performance, the buffer should be kept
  30. // smaller than the half the Cortex-M7 data cache.
  31. #define BYTES_PER_DMA 40
  32. uint8_t OctoWS2811::defaultPinList[8] = {2, 14, 7, 8, 6, 20, 21, 5};
  33. uint16_t OctoWS2811::stripLen;
  34. void * OctoWS2811::frameBuffer;
  35. void * OctoWS2811::drawBuffer;
  36. uint8_t OctoWS2811::params;
  37. DMAChannel OctoWS2811::dma1;
  38. DMAChannel OctoWS2811::dma2;
  39. DMAChannel OctoWS2811::dma3;
  40. static DMASetting dma2next;
  41. static uint32_t numbytes;
  42. static uint8_t numpins;
  43. static uint8_t pinlist[NUM_DIGITAL_PINS]; // = {2, 14, 7, 8, 6, 20, 21, 5};
  44. static uint8_t pin_bitnum[NUM_DIGITAL_PINS];
  45. static uint8_t pin_offset[NUM_DIGITAL_PINS];
  46. static uint16_t comp1load[3];
  47. DMAMEM static uint32_t bitmask[4] __attribute__ ((used, aligned(32)));
  48. DMAMEM static uint32_t bitdata[BYTES_PER_DMA*64] __attribute__ ((used, aligned(32)));
  49. volatile uint32_t framebuffer_index = 0;
  50. volatile bool dma_first;
  51. static uint32_t update_begin_micros = 0;
  52. OctoWS2811::OctoWS2811(uint32_t numPerStrip, void *frameBuf, void *drawBuf, uint8_t config, uint8_t numPins, const uint8_t *pinList)
  53. {
  54. stripLen = numPerStrip;
  55. frameBuffer = frameBuf;
  56. drawBuffer = drawBuf;
  57. params = config;
  58. if (numPins > NUM_DIGITAL_PINS) numPins = NUM_DIGITAL_PINS;
  59. numpins = numPins;
  60. memcpy(pinlist, pinList, numpins);
  61. }
  62. void OctoWS2811::begin(uint32_t numPerStrip, void *frameBuf, void *drawBuf, uint8_t config, uint8_t numPins, const uint8_t *pinList)
  63. {
  64. stripLen = numPerStrip;
  65. frameBuffer = frameBuf;
  66. drawBuffer = drawBuf;
  67. params = config;
  68. if (numPins > NUM_DIGITAL_PINS) numPins = NUM_DIGITAL_PINS;
  69. numpins = numPins;
  70. memcpy(pinlist, pinList, numpins);
  71. begin();
  72. }
  73. int OctoWS2811::numPixels(void)
  74. {
  75. return stripLen * numpins;
  76. }
  77. extern "C" void xbar_connect(unsigned int input, unsigned int output); // in pwm.c
  78. static volatile uint32_t *standard_gpio_addr(volatile uint32_t *fastgpio) {
  79. return (volatile uint32_t *)((uint32_t)fastgpio - 0x01E48000);
  80. }
  81. void OctoWS2811::begin(void)
  82. {
  83. numbytes = stripLen * 3; // TODO: 4 if RGBW
  84. // configure which pins to use
  85. memset(bitmask, 0, sizeof(bitmask));
  86. for (uint32_t i=0; i < numpins; i++) {
  87. uint8_t pin = pinlist[i];
  88. if (pin >= NUM_DIGITAL_PINS) continue; // ignore illegal pins
  89. uint8_t bit = digitalPinToBit(pin);
  90. uint8_t offset = ((uint32_t)portOutputRegister(pin) - (uint32_t)&GPIO6_DR) >> 14;
  91. if (offset > 3) continue; // ignore unknown pins
  92. pin_bitnum[i] = bit;
  93. pin_offset[i] = offset;
  94. uint32_t mask = 1 << bit;
  95. bitmask[offset] |= mask;
  96. *(&IOMUXC_GPR_GPR26 + offset) &= ~mask;
  97. *standard_gpio_addr(portModeRegister(pin)) |= mask;
  98. }
  99. arm_dcache_flush_delete(bitmask, sizeof(bitmask));
  100. // Set up 3 timers to create waveform timing events
  101. comp1load[0] = (uint16_t)((float)F_BUS_ACTUAL * (float)TH_TL);
  102. comp1load[1] = (uint16_t)((float)F_BUS_ACTUAL * (float)T0H);
  103. comp1load[2] = (uint16_t)((float)F_BUS_ACTUAL * (float)T1H);
  104. if ((params & 0xF0) == WS2811_400kHz) {
  105. comp1load[0] *= 2;
  106. comp1load[1] *= 2;
  107. comp1load[2] *= 2;
  108. }
  109. TMR4_ENBL &= ~7;
  110. TMR4_SCTRL0 = TMR_SCTRL_OEN | TMR_SCTRL_FORCE | TMR_SCTRL_MSTR;
  111. TMR4_CSCTRL0 = TMR_CSCTRL_CL1(1) | TMR_CSCTRL_TCF1EN;
  112. TMR4_CNTR0 = 0;
  113. TMR4_LOAD0 = 0;
  114. TMR4_COMP10 = comp1load[0];
  115. TMR4_CMPLD10 = comp1load[0];
  116. TMR4_CTRL0 = TMR_CTRL_CM(1) | TMR_CTRL_PCS(8) | TMR_CTRL_LENGTH | TMR_CTRL_OUTMODE(3);
  117. TMR4_SCTRL1 = TMR_SCTRL_OEN | TMR_SCTRL_FORCE;
  118. TMR4_CNTR1 = 0;
  119. TMR4_LOAD1 = 0;
  120. TMR4_COMP11 = comp1load[1]; // T0H
  121. TMR4_CMPLD11 = comp1load[1];
  122. TMR4_CTRL1 = TMR_CTRL_CM(1) | TMR_CTRL_PCS(8) | TMR_CTRL_COINIT | TMR_CTRL_OUTMODE(3);
  123. TMR4_SCTRL2 = TMR_SCTRL_OEN | TMR_SCTRL_FORCE;
  124. TMR4_CNTR2 = 0;
  125. TMR4_LOAD2 = 0;
  126. TMR4_COMP12 = comp1load[2]; // T1H
  127. TMR4_CMPLD12 = comp1load[2];
  128. TMR4_CTRL2 = TMR_CTRL_CM(1) | TMR_CTRL_PCS(8) | TMR_CTRL_COINIT | TMR_CTRL_OUTMODE(3);
  129. // route the timer outputs through XBAR to edge trigger DMA request
  130. CCM_CCGR2 |= CCM_CCGR2_XBAR1(CCM_CCGR_ON);
  131. xbar_connect(XBARA1_IN_QTIMER4_TIMER0, XBARA1_OUT_DMA_CH_MUX_REQ30);
  132. xbar_connect(XBARA1_IN_QTIMER4_TIMER1, XBARA1_OUT_DMA_CH_MUX_REQ31);
  133. xbar_connect(XBARA1_IN_QTIMER4_TIMER2, XBARA1_OUT_DMA_CH_MUX_REQ94);
  134. XBARA1_CTRL0 = XBARA_CTRL_STS1 | XBARA_CTRL_EDGE1(3) | XBARA_CTRL_DEN1 |
  135. XBARA_CTRL_STS0 | XBARA_CTRL_EDGE0(3) | XBARA_CTRL_DEN0;
  136. XBARA1_CTRL1 = XBARA_CTRL_STS0 | XBARA_CTRL_EDGE0(3) | XBARA_CTRL_DEN0;
  137. // configure DMA channels
  138. dma1.begin();
  139. dma1.TCD->SADDR = bitmask;
  140. dma1.TCD->SOFF = 8;
  141. dma1.TCD->ATTR = DMA_TCD_ATTR_SSIZE(3) | DMA_TCD_ATTR_SMOD(4) | DMA_TCD_ATTR_DSIZE(2);
  142. dma1.TCD->NBYTES_MLOFFYES = DMA_TCD_NBYTES_DMLOE |
  143. DMA_TCD_NBYTES_MLOFFYES_MLOFF(-65536) |
  144. DMA_TCD_NBYTES_MLOFFYES_NBYTES(16);
  145. dma1.TCD->SLAST = 0;
  146. dma1.TCD->DADDR = &GPIO1_DR_SET;
  147. dma1.TCD->DOFF = 16384;
  148. dma1.TCD->CITER_ELINKNO = numbytes * 8;
  149. dma1.TCD->DLASTSGA = -65536;
  150. dma1.TCD->BITER_ELINKNO = numbytes * 8;
  151. dma1.TCD->CSR = DMA_TCD_CSR_DREQ;
  152. dma1.triggerAtHardwareEvent(DMAMUX_SOURCE_XBAR1_0);
  153. dma2next.TCD->SADDR = bitdata;
  154. dma2next.TCD->SOFF = 8;
  155. dma2next.TCD->ATTR = DMA_TCD_ATTR_SSIZE(3) | DMA_TCD_ATTR_DSIZE(2);
  156. dma2next.TCD->NBYTES_MLOFFYES = DMA_TCD_NBYTES_DMLOE |
  157. DMA_TCD_NBYTES_MLOFFYES_MLOFF(-65536) |
  158. DMA_TCD_NBYTES_MLOFFYES_NBYTES(16);
  159. dma2next.TCD->SLAST = 0;
  160. dma2next.TCD->DADDR = &GPIO1_DR_CLEAR;
  161. dma2next.TCD->DOFF = 16384;
  162. dma2next.TCD->CITER_ELINKNO = BYTES_PER_DMA * 8;
  163. dma2next.TCD->DLASTSGA = (int32_t)(dma2next.TCD);
  164. dma2next.TCD->BITER_ELINKNO = BYTES_PER_DMA * 8;
  165. dma2next.TCD->CSR = 0;
  166. dma2.begin();
  167. dma2 = dma2next; // copies TCD
  168. dma2.triggerAtHardwareEvent(DMAMUX_SOURCE_XBAR1_1);
  169. dma2.attachInterrupt(isr);
  170. dma3.begin();
  171. dma3.TCD->SADDR = bitmask;
  172. dma3.TCD->SOFF = 8;
  173. dma3.TCD->ATTR = DMA_TCD_ATTR_SSIZE(3) | DMA_TCD_ATTR_SMOD(4) | DMA_TCD_ATTR_DSIZE(2);
  174. dma3.TCD->NBYTES_MLOFFYES = DMA_TCD_NBYTES_DMLOE |
  175. DMA_TCD_NBYTES_MLOFFYES_MLOFF(-65536) |
  176. DMA_TCD_NBYTES_MLOFFYES_NBYTES(16);
  177. dma3.TCD->SLAST = 0;
  178. dma3.TCD->DADDR = &GPIO1_DR_CLEAR;
  179. dma3.TCD->DOFF = 16384;
  180. dma3.TCD->CITER_ELINKNO = numbytes * 8;
  181. dma3.TCD->DLASTSGA = -65536;
  182. dma3.TCD->BITER_ELINKNO = numbytes * 8;
  183. dma3.TCD->CSR = DMA_TCD_CSR_DREQ | DMA_TCD_CSR_DONE;
  184. dma3.triggerAtHardwareEvent(DMAMUX_SOURCE_XBAR1_2);
  185. // set up the buffers
  186. uint32_t bufsize = numbytes * numpins;
  187. memset(frameBuffer, 0, bufsize);
  188. if (drawBuffer) {
  189. memset(drawBuffer, 0, bufsize);
  190. } else {
  191. drawBuffer = frameBuffer;
  192. }
  193. }
  194. static void fillbits(uint32_t *dest, const uint8_t *pixels, int n, uint32_t mask)
  195. {
  196. do {
  197. uint8_t pix = *pixels++;
  198. if (!(pix & 0x80)) *dest |= mask;
  199. dest += 4;
  200. if (!(pix & 0x40)) *dest |= mask;
  201. dest += 4;
  202. if (!(pix & 0x20)) *dest |= mask;
  203. dest += 4;
  204. if (!(pix & 0x10)) *dest |= mask;
  205. dest += 4;
  206. if (!(pix & 0x08)) *dest |= mask;
  207. dest += 4;
  208. if (!(pix & 0x04)) *dest |= mask;
  209. dest += 4;
  210. if (!(pix & 0x02)) *dest |= mask;
  211. dest += 4;
  212. if (!(pix & 0x01)) *dest |= mask;
  213. dest += 4;
  214. } while (--n > 0);
  215. }
  216. void OctoWS2811::show(void)
  217. {
  218. // wait for any prior DMA operation
  219. while (!dma3.complete()) ; // wait
  220. // it's ok to copy the drawing buffer to the frame buffer
  221. // during the 50us WS2811 reset time
  222. if (drawBuffer != frameBuffer) {
  223. memcpy(frameBuffer, drawBuffer, numbytes * numpins);
  224. }
  225. // disable timers
  226. uint16_t enable = TMR4_ENBL;
  227. TMR4_ENBL = enable & ~7;
  228. // force all timer outputs to logic low
  229. TMR4_SCTRL0 = TMR_SCTRL_OEN | TMR_SCTRL_FORCE | TMR_SCTRL_MSTR;
  230. TMR4_SCTRL1 = TMR_SCTRL_OEN | TMR_SCTRL_FORCE;
  231. TMR4_SCTRL2 = TMR_SCTRL_OEN | TMR_SCTRL_FORCE;
  232. // clear any prior pending DMA requests
  233. XBARA1_CTRL0 |= XBARA_CTRL_STS1 | XBARA_CTRL_STS0;
  234. XBARA1_CTRL1 |= XBARA_CTRL_STS0;
  235. // fill the DMA transmit buffer
  236. //digitalWriteFast(12, HIGH);
  237. memset(bitdata, 0, sizeof(bitdata));
  238. uint32_t count = numbytes;
  239. if (count > BYTES_PER_DMA*2) count = BYTES_PER_DMA*2;
  240. framebuffer_index = count;
  241. for (uint32_t i=0; i < numpins; i++) {
  242. fillbits(bitdata + pin_offset[i], (uint8_t *)frameBuffer + i*numbytes,
  243. count, 1<<pin_bitnum[i]);
  244. }
  245. arm_dcache_flush_delete(bitdata, count * 128);
  246. //digitalWriteFast(12, LOW);
  247. // set up DMA transfers
  248. if (numbytes <= BYTES_PER_DMA*2) {
  249. dma2.TCD->SADDR = bitdata;
  250. dma2.TCD->DADDR = &GPIO1_DR_CLEAR;
  251. dma2.TCD->CITER_ELINKNO = count * 8;
  252. dma2.TCD->CSR = DMA_TCD_CSR_DREQ;
  253. } else {
  254. dma2.TCD->SADDR = bitdata;
  255. dma2.TCD->DADDR = &GPIO1_DR_CLEAR;
  256. dma2.TCD->CITER_ELINKNO = BYTES_PER_DMA * 8;
  257. dma2.TCD->CSR = 0;
  258. dma2.TCD->CSR = DMA_TCD_CSR_INTMAJOR | DMA_TCD_CSR_ESG;
  259. dma2next.TCD->SADDR = bitdata + BYTES_PER_DMA*32;
  260. dma2next.TCD->CITER_ELINKNO = BYTES_PER_DMA * 8;
  261. if (numbytes <= BYTES_PER_DMA*3) {
  262. dma2next.TCD->CSR = DMA_TCD_CSR_ESG;
  263. } else {
  264. dma2next.TCD->CSR = DMA_TCD_CSR_ESG | DMA_TCD_CSR_INTMAJOR;
  265. }
  266. dma_first = true;
  267. }
  268. dma3.clearComplete();
  269. dma1.enable();
  270. dma2.enable();
  271. dma3.enable();
  272. // initialize timers
  273. TMR4_CNTR0 = 0;
  274. TMR4_CNTR1 = comp1load[0] + 1;
  275. TMR4_CNTR2 = comp1load[0] + 1;
  276. // wait for WS2812 reset
  277. while (micros() - update_begin_micros < numbytes * 10 + 300) ;
  278. // start everything running!
  279. TMR4_ENBL = enable | 7;
  280. update_begin_micros = micros();
  281. }
  282. void OctoWS2811::isr(void)
  283. {
  284. // first ack the interrupt
  285. dma2.clearInterrupt();
  286. // fill (up to) half the transmit buffer with new data
  287. //digitalWriteFast(12, HIGH);
  288. uint32_t *dest;
  289. if (dma_first) {
  290. dma_first = false;
  291. dest = bitdata;
  292. } else {
  293. dma_first = true;
  294. dest = bitdata + BYTES_PER_DMA*32;
  295. }
  296. memset(dest, 0, sizeof(bitdata)/2);
  297. uint32_t index = framebuffer_index;
  298. uint32_t count = numbytes - framebuffer_index;
  299. if (count > BYTES_PER_DMA) count = BYTES_PER_DMA;
  300. framebuffer_index = index + count;
  301. for (int i=0; i < numpins; i++) {
  302. fillbits(dest + pin_offset[i], (uint8_t *)frameBuffer + index + i*numbytes,
  303. count, 1<<pin_bitnum[i]);
  304. }
  305. arm_dcache_flush_delete(dest, count * 128);
  306. //digitalWriteFast(12, LOW);
  307. // queue it for the next DMA transfer
  308. dma2next.TCD->SADDR = dest;
  309. dma2next.TCD->CITER_ELINKNO = count * 8;
  310. uint32_t remain = numbytes - (index + count);
  311. if (remain == 0) {
  312. dma2next.TCD->CSR = DMA_TCD_CSR_DREQ;
  313. } else if (remain <= BYTES_PER_DMA) {
  314. dma2next.TCD->CSR = DMA_TCD_CSR_ESG;
  315. } else {
  316. dma2next.TCD->CSR = DMA_TCD_CSR_ESG | DMA_TCD_CSR_INTMAJOR;
  317. }
  318. }
  319. int OctoWS2811::busy(void)
  320. {
  321. if (!dma3.complete()) ; // DMA still running
  322. if (micros() - update_begin_micros < numbytes * 10 + 300) return 1; // WS2812 reset
  323. return 0;
  324. }
  325. // For Teensy 4.x, the pixel data is stored in ordinary RGB format. Translation
  326. // from 24 bit color to GPIO bitmasks is done on-the-fly by fillbits(). This is
  327. // different from Teensy 3.x, where the data was stored as bytes to write directly
  328. // to the GPIO output register.
  329. void OctoWS2811::setPixel(uint32_t num, int color)
  330. {
  331. switch (params & 7) {
  332. case WS2811_RBG:
  333. color = (color&0xFF0000) | ((color<<8)&0x00FF00) | ((color>>8)&0x0000FF);
  334. break;
  335. case WS2811_GRB:
  336. color = ((color<<8)&0xFF0000) | ((color>>8)&0x00FF00) | (color&0x0000FF);
  337. break;
  338. case WS2811_GBR:
  339. color = ((color<<16)&0xFF0000) | ((color>>8)&0x00FFFF);
  340. break;
  341. case WS2811_BRG:
  342. color = ((color<<8)&0xFFFF00) | ((color>>16)&0x0000FF);
  343. break;
  344. case WS2811_BGR:
  345. color = ((color<<16)&0xFF0000) | (color&0x00FF00) | ((color>>16)&0x0000FF);
  346. break;
  347. default:
  348. break;
  349. }
  350. uint8_t *dest = (uint8_t *)drawBuffer + num * 3;
  351. *dest++ = color >> 16;
  352. *dest++ = color >> 8;
  353. *dest++ = color;
  354. }
  355. int OctoWS2811::getPixel(uint32_t num)
  356. {
  357. const uint8_t *p = (uint8_t *)drawBuffer + num * 3;
  358. int color = p[2] | (p[1] << 8) | (p[0] << 16);
  359. switch (params & 7) {
  360. case WS2811_RBG:
  361. color = (color&0xFF0000) | ((color<<8)&0x00FF00) | ((color>>8)&0x0000FF);
  362. break;
  363. case WS2811_GRB:
  364. color = ((color<<8)&0xFF0000) | ((color>>8)&0x00FF00) | (color&0x0000FF);
  365. break;
  366. case WS2811_GBR:
  367. color = ((color<<8)&0xFFFF00) | ((color>>16)&0x0000FF);
  368. break;
  369. case WS2811_BRG:
  370. color = ((color<<16)&0xFF0000) | ((color>>8)&0x00FFFF);
  371. break;
  372. case WS2811_BGR:
  373. color = ((color<<16)&0xFF0000) | (color&0x00FF00) | ((color>>16)&0x0000FF);
  374. break;
  375. default:
  376. break;
  377. }
  378. return color;
  379. }
  380. #endif // __IMXRT1062__