PlatformIO package of the Teensy core framework compatible with GCC 10 & C++20
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

277 lines
6.7KB

  1. #ifndef __INC_BITSWAP_H
  2. #define __INC_BITSWAP_H
  3. #include "FastLED.h"
  4. FASTLED_NAMESPACE_BEGIN
  5. ///@file bitswap.h
  6. ///Functions for rotating bits/bytes
  7. ///@defgroup Bitswap Bit swapping/rotate
  8. ///Functions for doing a rotation of bits/bytes used by parallel output
  9. ///@{
  10. #if defined(FASTLED_ARM) || defined(FASTLED_ESP8266)
  11. /// structure representing 8 bits of access
  12. typedef union {
  13. uint8_t raw;
  14. struct {
  15. uint32_t a0:1;
  16. uint32_t a1:1;
  17. uint32_t a2:1;
  18. uint32_t a3:1;
  19. uint32_t a4:1;
  20. uint32_t a5:1;
  21. uint32_t a6:1;
  22. uint32_t a7:1;
  23. };
  24. } just8bits;
  25. /// structure representing 32 bits of access
  26. typedef struct {
  27. uint32_t a0:1;
  28. uint32_t a1:1;
  29. uint32_t a2:1;
  30. uint32_t a3:1;
  31. uint32_t a4:1;
  32. uint32_t a5:1;
  33. uint32_t a6:1;
  34. uint32_t a7:1;
  35. uint32_t b0:1;
  36. uint32_t b1:1;
  37. uint32_t b2:1;
  38. uint32_t b3:1;
  39. uint32_t b4:1;
  40. uint32_t b5:1;
  41. uint32_t b6:1;
  42. uint32_t b7:1;
  43. uint32_t c0:1;
  44. uint32_t c1:1;
  45. uint32_t c2:1;
  46. uint32_t c3:1;
  47. uint32_t c4:1;
  48. uint32_t c5:1;
  49. uint32_t c6:1;
  50. uint32_t c7:1;
  51. uint32_t d0:1;
  52. uint32_t d1:1;
  53. uint32_t d2:1;
  54. uint32_t d3:1;
  55. uint32_t d4:1;
  56. uint32_t d5:1;
  57. uint32_t d6:1;
  58. uint32_t d7:1;
  59. } sub4;
  60. /// union containing a full 8 bytes to swap the bit orientation on
  61. typedef union {
  62. uint32_t word[2];
  63. uint8_t bytes[8];
  64. struct {
  65. sub4 a;
  66. sub4 b;
  67. };
  68. } bitswap_type;
  69. #define SWAPSA(X,N) out. X ## 0 = in.a.a ## N; \
  70. out. X ## 1 = in.a.b ## N; \
  71. out. X ## 2 = in.a.c ## N; \
  72. out. X ## 3 = in.a.d ## N;
  73. #define SWAPSB(X,N) out. X ## 0 = in.b.a ## N; \
  74. out. X ## 1 = in.b.b ## N; \
  75. out. X ## 2 = in.b.c ## N; \
  76. out. X ## 3 = in.b.d ## N;
  77. #define SWAPS(X,N) out. X ## 0 = in.a.a ## N; \
  78. out. X ## 1 = in.a.b ## N; \
  79. out. X ## 2 = in.a.c ## N; \
  80. out. X ## 3 = in.a.d ## N; \
  81. out. X ## 4 = in.b.a ## N; \
  82. out. X ## 5 = in.b.b ## N; \
  83. out. X ## 6 = in.b.c ## N; \
  84. out. X ## 7 = in.b.d ## N;
  85. /// Do an 8byte by 8bit rotation
  86. __attribute__((always_inline)) inline void swapbits8(bitswap_type in, bitswap_type & out) {
  87. // SWAPS(a.a,7);
  88. // SWAPS(a.b,6);
  89. // SWAPS(a.c,5);
  90. // SWAPS(a.d,4);
  91. // SWAPS(b.a,3);
  92. // SWAPS(b.b,2);
  93. // SWAPS(b.c,1);
  94. // SWAPS(b.d,0);
  95. // SWAPSA(a.a,7);
  96. // SWAPSA(a.b,6);
  97. // SWAPSA(a.c,5);
  98. // SWAPSA(a.d,4);
  99. //
  100. // SWAPSB(a.a,7);
  101. // SWAPSB(a.b,6);
  102. // SWAPSB(a.c,5);
  103. // SWAPSB(a.d,4);
  104. //
  105. // SWAPSA(b.a,3);
  106. // SWAPSA(b.b,2);
  107. // SWAPSA(b.c,1);
  108. // SWAPSA(b.d,0);
  109. // //
  110. // SWAPSB(b.a,3);
  111. // SWAPSB(b.b,2);
  112. // SWAPSB(b.c,1);
  113. // SWAPSB(b.d,0);
  114. for(int i = 0; i < 8; i++) {
  115. just8bits work;
  116. work.a3 = in.word[0] >> 31;
  117. work.a2 = in.word[0] >> 23;
  118. work.a1 = in.word[0] >> 15;
  119. work.a0 = in.word[0] >> 7;
  120. in.word[0] <<= 1;
  121. work.a7 = in.word[1] >> 31;
  122. work.a6 = in.word[1] >> 23;
  123. work.a5 = in.word[1] >> 15;
  124. work.a4 = in.word[1] >> 7;
  125. in.word[1] <<= 1;
  126. out.bytes[i] = work.raw;
  127. }
  128. }
  129. /// Slow version of the 8 byte by 8 bit rotation
  130. __attribute__((always_inline)) inline void slowswap(unsigned char *A, unsigned char *B) {
  131. for(int row = 0; row < 7; row++) {
  132. uint8_t x = A[row];
  133. uint8_t bit = (1<<row);
  134. unsigned char *p = B;
  135. for(uint32_t mask = 1<<7 ; mask ; mask >>= 1) {
  136. if(x & mask) {
  137. *p++ |= bit;
  138. } else {
  139. *p++ &= ~bit;
  140. }
  141. }
  142. // B[7] |= (x & 0x01) << row; x >>= 1;
  143. // B[6] |= (x & 0x01) << row; x >>= 1;
  144. // B[5] |= (x & 0x01) << row; x >>= 1;
  145. // B[4] |= (x & 0x01) << row; x >>= 1;
  146. // B[3] |= (x & 0x01) << row; x >>= 1;
  147. // B[2] |= (x & 0x01) << row; x >>= 1;
  148. // B[1] |= (x & 0x01) << row; x >>= 1;
  149. // B[0] |= (x & 0x01) << row; x >>= 1;
  150. }
  151. }
  152. void transpose8x1_noinline(unsigned char *A, unsigned char *B);
  153. /// Simplified form of bits rotating function. Based on code found here - http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt - rotating
  154. /// data into LSB for a faster write (the code using this data can happily walk the array backwards)
  155. __attribute__((always_inline)) inline void transpose8x1(unsigned char *A, unsigned char *B) {
  156. uint32_t x, y, t;
  157. // Load the array and pack it into x and y.
  158. y = *(unsigned int*)(A);
  159. x = *(unsigned int*)(A+4);
  160. // pre-transform x
  161. t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7);
  162. t = (x ^ (x >>14)) & 0x0000CCCC; x = x ^ t ^ (t <<14);
  163. // pre-transform y
  164. t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7);
  165. t = (y ^ (y >>14)) & 0x0000CCCC; y = y ^ t ^ (t <<14);
  166. // final transform
  167. t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
  168. y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
  169. x = t;
  170. *((uint32_t*)B) = y;
  171. *((uint32_t*)(B+4)) = x;
  172. }
  173. /// Simplified form of bits rotating function. Based on code found here - http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt
  174. __attribute__((always_inline)) inline void transpose8x1_MSB(unsigned char *A, unsigned char *B) {
  175. uint32_t x, y, t;
  176. // Load the array and pack it into x and y.
  177. y = *(unsigned int*)(A);
  178. x = *(unsigned int*)(A+4);
  179. // pre-transform x
  180. t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7);
  181. t = (x ^ (x >>14)) & 0x0000CCCC; x = x ^ t ^ (t <<14);
  182. // pre-transform y
  183. t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7);
  184. t = (y ^ (y >>14)) & 0x0000CCCC; y = y ^ t ^ (t <<14);
  185. // final transform
  186. t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
  187. y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
  188. x = t;
  189. B[7] = y; y >>= 8;
  190. B[6] = y; y >>= 8;
  191. B[5] = y; y >>= 8;
  192. B[4] = y;
  193. B[3] = x; x >>= 8;
  194. B[2] = x; x >>= 8;
  195. B[1] = x; x >>= 8;
  196. B[0] = x; /* */
  197. }
  198. /// templated bit-rotating function. Based on code found here - http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt
  199. template<int m, int n>
  200. __attribute__((always_inline)) inline void transpose8(unsigned char *A, unsigned char *B) {
  201. uint32_t x, y, t;
  202. // Load the array and pack it into x and y.
  203. if(m == 1) {
  204. y = *(unsigned int*)(A);
  205. x = *(unsigned int*)(A+4);
  206. } else {
  207. x = (A[0]<<24) | (A[m]<<16) | (A[2*m]<<8) | A[3*m];
  208. y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
  209. }
  210. // pre-transform x
  211. t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7);
  212. t = (x ^ (x >>14)) & 0x0000CCCC; x = x ^ t ^ (t <<14);
  213. // pre-transform y
  214. t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7);
  215. t = (y ^ (y >>14)) & 0x0000CCCC; y = y ^ t ^ (t <<14);
  216. // final transform
  217. t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
  218. y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
  219. x = t;
  220. B[7*n] = y; y >>= 8;
  221. B[6*n] = y; y >>= 8;
  222. B[5*n] = y; y >>= 8;
  223. B[4*n] = y;
  224. B[3*n] = x; x >>= 8;
  225. B[2*n] = x; x >>= 8;
  226. B[n] = x; x >>= 8;
  227. B[0] = x;
  228. // B[0]=x>>24; B[n]=x>>16; B[2*n]=x>>8; B[3*n]=x>>0;
  229. // B[4*n]=y>>24; B[5*n]=y>>16; B[6*n]=y>>8; B[7*n]=y>>0;
  230. }
  231. #endif
  232. FASTLED_NAMESPACE_END
  233. ///@}
  234. #endif