PlatformIO package of the Teensy core framework compatible with GCC 10 & C++20
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

713 líneas
23KB

  1. #ifndef __INC_LIB8TION_SCALE_H
  2. #define __INC_LIB8TION_SCALE_H
  3. ///@ingroup lib8tion
  4. ///@defgroup Scaling Scaling functions
  5. /// Fast, efficient 8-bit scaling functions specifically
  6. /// designed for high-performance LED programming.
  7. ///
  8. /// Because of the AVR(Arduino) and ARM assembly language
  9. /// implementations provided, using these functions often
  10. /// results in smaller and faster code than the equivalent
  11. /// program using plain "C" arithmetic and logic.
  12. ///@{
  13. /// scale one byte by a second one, which is treated as
  14. /// the numerator of a fraction whose denominator is 256
  15. /// In other words, it computes i * (scale / 256)
  16. /// 4 clocks AVR with MUL, 2 clocks ARM
  17. LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale)
  18. {
  19. #if SCALE8_C == 1
  20. #if (FASTLED_SCALE8_FIXED == 1)
  21. return (((uint16_t)i) * (1+(uint16_t)(scale))) >> 8;
  22. #else
  23. return ((uint16_t)i * (uint16_t)(scale) ) >> 8;
  24. #endif
  25. #elif SCALE8_AVRASM == 1
  26. #if defined(LIB8_ATTINY)
  27. #if (FASTLED_SCALE8_FIXED == 1)
  28. uint8_t work=i;
  29. #else
  30. uint8_t work=0;
  31. #endif
  32. uint8_t cnt=0x80;
  33. asm volatile(
  34. #if (FASTLED_SCALE8_FIXED == 1)
  35. " inc %[scale] \n\t"
  36. " breq DONE_%= \n\t"
  37. " clr %[work] \n\t"
  38. #endif
  39. "LOOP_%=: \n\t"
  40. /*" sbrc %[scale], 0 \n\t"
  41. " add %[work], %[i] \n\t"
  42. " ror %[work] \n\t"
  43. " lsr %[scale] \n\t"
  44. " clc \n\t"*/
  45. " sbrc %[scale], 0 \n\t"
  46. " add %[work], %[i] \n\t"
  47. " ror %[work] \n\t"
  48. " lsr %[scale] \n\t"
  49. " lsr %[cnt] \n\t"
  50. "brcc LOOP_%= \n\t"
  51. "DONE_%=: \n\t"
  52. : [work] "+r" (work), [cnt] "+r" (cnt)
  53. : [scale] "r" (scale), [i] "r" (i)
  54. :
  55. );
  56. return work;
  57. #else
  58. asm volatile(
  59. #if (FASTLED_SCALE8_FIXED==1)
  60. // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
  61. "mul %0, %1 \n\t"
  62. // Add i to r0, possibly setting the carry flag
  63. "add r0, %0 \n\t"
  64. // load the immediate 0 into i (note, this does _not_ touch any flags)
  65. "ldi %0, 0x00 \n\t"
  66. // walk and chew gum at the same time
  67. "adc %0, r1 \n\t"
  68. #else
  69. /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
  70. "mul %0, %1 \n\t"
  71. /* Move the high 8-bits of the product (r1) back to i */
  72. "mov %0, r1 \n\t"
  73. /* Restore r1 to "0"; it's expected to always be that */
  74. #endif
  75. "clr __zero_reg__ \n\t"
  76. : "+a" (i) /* writes to i */
  77. : "a" (scale) /* uses scale */
  78. : "r0", "r1" /* clobbers r0, r1 */ );
  79. /* Return the result */
  80. return i;
  81. #endif
  82. #else
  83. #error "No implementation for scale8 available."
  84. #endif
  85. }
  86. /// The "video" version of scale8 guarantees that the output will
  87. /// be only be zero if one or both of the inputs are zero. If both
  88. /// inputs are non-zero, the output is guaranteed to be non-zero.
  89. /// This makes for better 'video'/LED dimming, at the cost of
  90. /// several additional cycles.
  91. LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale)
  92. {
  93. #if SCALE8_C == 1 || defined(LIB8_ATTINY)
  94. uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
  95. // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  96. // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
  97. return j;
  98. #elif SCALE8_AVRASM == 1
  99. uint8_t j=0;
  100. asm volatile(
  101. " tst %[i]\n\t"
  102. " breq L_%=\n\t"
  103. " mul %[i], %[scale]\n\t"
  104. " mov %[j], r1\n\t"
  105. " clr __zero_reg__\n\t"
  106. " cpse %[scale], r1\n\t"
  107. " subi %[j], 0xFF\n\t"
  108. "L_%=: \n\t"
  109. : [j] "+a" (j)
  110. : [i] "a" (i), [scale] "a" (scale)
  111. : "r0", "r1");
  112. return j;
  113. // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  114. // asm volatile(
  115. // " tst %0 \n"
  116. // " breq L_%= \n"
  117. // " mul %0, %1 \n"
  118. // " mov %0, r1 \n"
  119. // " add %0, %2 \n"
  120. // " clr __zero_reg__ \n"
  121. // "L_%=: \n"
  122. // : "+a" (i)
  123. // : "a" (scale), "a" (nonzeroscale)
  124. // : "r0", "r1");
  125. // // Return the result
  126. // return i;
  127. #else
  128. #error "No implementation for scale8_video available."
  129. #endif
  130. }
  131. /// This version of scale8 does not clean up the R1 register on AVR
  132. /// If you are doing several 'scale8's in a row, use this, and
  133. /// then explicitly call cleanup_R1.
  134. LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
  135. {
  136. #if SCALE8_C == 1
  137. #if (FASTLED_SCALE8_FIXED == 1)
  138. return (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8;
  139. #else
  140. return ((int)i * (int)(scale) ) >> 8;
  141. #endif
  142. #elif SCALE8_AVRASM == 1
  143. asm volatile(
  144. #if (FASTLED_SCALE8_FIXED==1)
  145. // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
  146. "mul %0, %1 \n\t"
  147. // Add i to r0, possibly setting the carry flag
  148. "add r0, %0 \n\t"
  149. // load the immediate 0 into i (note, this does _not_ touch any flags)
  150. "ldi %0, 0x00 \n\t"
  151. // walk and chew gum at the same time
  152. "adc %0, r1 \n\t"
  153. #else
  154. /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
  155. "mul %0, %1 \n\t"
  156. /* Move the high 8-bits of the product (r1) back to i */
  157. "mov %0, r1 \n\t"
  158. #endif
  159. /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
  160. /* "clr __zero_reg__ \n\t" */
  161. : "+a" (i) /* writes to i */
  162. : "a" (scale) /* uses scale */
  163. : "r0", "r1" /* clobbers r0, r1 */ );
  164. // Return the result
  165. return i;
  166. #else
  167. #error "No implementation for scale8_LEAVING_R1_DIRTY available."
  168. #endif
  169. }
  170. /// In place modifying version of scale8, also this version of nscale8 does not
  171. /// clean up the R1 register on AVR
  172. /// If you are doing several 'scale8's in a row, use this, and
  173. /// then explicitly call cleanup_R1.
  174. LIB8STATIC_ALWAYS_INLINE void nscale8_LEAVING_R1_DIRTY( uint8_t& i, fract8 scale)
  175. {
  176. #if SCALE8_C == 1
  177. #if (FASTLED_SCALE8_FIXED == 1)
  178. i = (((uint16_t)i) * ((uint16_t)(scale)+1)) >> 8;
  179. #else
  180. i = ((int)i * (int)(scale) ) >> 8;
  181. #endif
  182. #elif SCALE8_AVRASM == 1
  183. asm volatile(
  184. #if (FASTLED_SCALE8_FIXED==1)
  185. // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
  186. "mul %0, %1 \n\t"
  187. // Add i to r0, possibly setting the carry flag
  188. "add r0, %0 \n\t"
  189. // load the immediate 0 into i (note, this does _not_ touch any flags)
  190. "ldi %0, 0x00 \n\t"
  191. // walk and chew gum at the same time
  192. "adc %0, r1 \n\t"
  193. #else
  194. /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
  195. "mul %0, %1 \n\t"
  196. /* Move the high 8-bits of the product (r1) back to i */
  197. "mov %0, r1 \n\t"
  198. #endif
  199. /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
  200. /* "clr __zero_reg__ \n\t" */
  201. : "+a" (i) /* writes to i */
  202. : "a" (scale) /* uses scale */
  203. : "r0", "r1" /* clobbers r0, r1 */ );
  204. #else
  205. #error "No implementation for nscale8_LEAVING_R1_DIRTY available."
  206. #endif
  207. }
  208. /// This version of scale8_video does not clean up the R1 register on AVR
  209. /// If you are doing several 'scale8_video's in a row, use this, and
  210. /// then explicitly call cleanup_R1.
  211. LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract8 scale)
  212. {
  213. #if SCALE8_C == 1 || defined(LIB8_ATTINY)
  214. uint8_t j = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
  215. // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  216. // uint8_t j = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
  217. return j;
  218. #elif SCALE8_AVRASM == 1
  219. uint8_t j=0;
  220. asm volatile(
  221. " tst %[i]\n\t"
  222. " breq L_%=\n\t"
  223. " mul %[i], %[scale]\n\t"
  224. " mov %[j], r1\n\t"
  225. " breq L_%=\n\t"
  226. " subi %[j], 0xFF\n\t"
  227. "L_%=: \n\t"
  228. : [j] "+a" (j)
  229. : [i] "a" (i), [scale] "a" (scale)
  230. : "r0", "r1");
  231. return j;
  232. // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  233. // asm volatile(
  234. // " tst %0 \n"
  235. // " breq L_%= \n"
  236. // " mul %0, %1 \n"
  237. // " mov %0, r1 \n"
  238. // " add %0, %2 \n"
  239. // " clr __zero_reg__ \n"
  240. // "L_%=: \n"
  241. // : "+a" (i)
  242. // : "a" (scale), "a" (nonzeroscale)
  243. // : "r0", "r1");
  244. // // Return the result
  245. // return i;
  246. #else
  247. #error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
  248. #endif
  249. }
  250. /// In place modifying version of scale8_video, also this version of nscale8_video
  251. /// does not clean up the R1 register on AVR
  252. /// If you are doing several 'scale8_video's in a row, use this, and
  253. /// then explicitly call cleanup_R1.
  254. LIB8STATIC_ALWAYS_INLINE void nscale8_video_LEAVING_R1_DIRTY( uint8_t & i, fract8 scale)
  255. {
  256. #if SCALE8_C == 1 || defined(LIB8_ATTINY)
  257. i = (((int)i * (int)scale) >> 8) + ((i&&scale)?1:0);
  258. #elif SCALE8_AVRASM == 1
  259. asm volatile(
  260. " tst %[i]\n\t"
  261. " breq L_%=\n\t"
  262. " mul %[i], %[scale]\n\t"
  263. " mov %[i], r1\n\t"
  264. " breq L_%=\n\t"
  265. " subi %[i], 0xFF\n\t"
  266. "L_%=: \n\t"
  267. : [i] "+a" (i)
  268. : [scale] "a" (scale)
  269. : "r0", "r1");
  270. #else
  271. #error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
  272. #endif
  273. }
  274. /// Clean up the r1 register after a series of *LEAVING_R1_DIRTY calls
  275. LIB8STATIC_ALWAYS_INLINE void cleanup_R1()
  276. {
  277. #if CLEANUP_R1_AVRASM == 1
  278. // Restore r1 to "0"; it's expected to always be that
  279. asm volatile( "clr __zero_reg__ \n\t" : : : "r1" );
  280. #endif
  281. }
  282. /// scale three one byte values by a fourth one, which is treated as
  283. /// the numerator of a fraction whose demominator is 256
  284. /// In other words, it computes r,g,b * (scale / 256)
  285. ///
  286. /// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE
  287. LIB8STATIC void nscale8x3( uint8_t& r, uint8_t& g, uint8_t& b, fract8 scale)
  288. {
  289. #if SCALE8_C == 1
  290. #if (FASTLED_SCALE8_FIXED == 1)
  291. uint16_t scale_fixed = scale + 1;
  292. r = (((uint16_t)r) * scale_fixed) >> 8;
  293. g = (((uint16_t)g) * scale_fixed) >> 8;
  294. b = (((uint16_t)b) * scale_fixed) >> 8;
  295. #else
  296. r = ((int)r * (int)(scale) ) >> 8;
  297. g = ((int)g * (int)(scale) ) >> 8;
  298. b = ((int)b * (int)(scale) ) >> 8;
  299. #endif
  300. #elif SCALE8_AVRASM == 1
  301. r = scale8_LEAVING_R1_DIRTY(r, scale);
  302. g = scale8_LEAVING_R1_DIRTY(g, scale);
  303. b = scale8_LEAVING_R1_DIRTY(b, scale);
  304. cleanup_R1();
  305. #else
  306. #error "No implementation for nscale8x3 available."
  307. #endif
  308. }
  309. /// scale three one byte values by a fourth one, which is treated as
  310. /// the numerator of a fraction whose demominator is 256
  311. /// In other words, it computes r,g,b * (scale / 256), ensuring
  312. /// that non-zero values passed in remain non zero, no matter how low the scale
  313. /// argument.
  314. ///
  315. /// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE
  316. LIB8STATIC void nscale8x3_video( uint8_t& r, uint8_t& g, uint8_t& b, fract8 scale)
  317. {
  318. #if SCALE8_C == 1
  319. uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  320. r = (r == 0) ? 0 : (((int)r * (int)(scale) ) >> 8) + nonzeroscale;
  321. g = (g == 0) ? 0 : (((int)g * (int)(scale) ) >> 8) + nonzeroscale;
  322. b = (b == 0) ? 0 : (((int)b * (int)(scale) ) >> 8) + nonzeroscale;
  323. #elif SCALE8_AVRASM == 1
  324. nscale8_video_LEAVING_R1_DIRTY( r, scale);
  325. nscale8_video_LEAVING_R1_DIRTY( g, scale);
  326. nscale8_video_LEAVING_R1_DIRTY( b, scale);
  327. cleanup_R1();
  328. #else
  329. #error "No implementation for nscale8x3 available."
  330. #endif
  331. }
  332. /// scale two one byte values by a third one, which is treated as
  333. /// the numerator of a fraction whose demominator is 256
  334. /// In other words, it computes i,j * (scale / 256)
  335. ///
  336. /// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE
  337. LIB8STATIC void nscale8x2( uint8_t& i, uint8_t& j, fract8 scale)
  338. {
  339. #if SCALE8_C == 1
  340. #if FASTLED_SCALE8_FIXED == 1
  341. uint16_t scale_fixed = scale + 1;
  342. i = (((uint16_t)i) * scale_fixed ) >> 8;
  343. j = (((uint16_t)j) * scale_fixed ) >> 8;
  344. #else
  345. i = ((uint16_t)i * (uint16_t)(scale) ) >> 8;
  346. j = ((uint16_t)j * (uint16_t)(scale) ) >> 8;
  347. #endif
  348. #elif SCALE8_AVRASM == 1
  349. i = scale8_LEAVING_R1_DIRTY(i, scale);
  350. j = scale8_LEAVING_R1_DIRTY(j, scale);
  351. cleanup_R1();
  352. #else
  353. #error "No implementation for nscale8x2 available."
  354. #endif
  355. }
  356. /// scale two one byte values by a third one, which is treated as
  357. /// the numerator of a fraction whose demominator is 256
  358. /// In other words, it computes i,j * (scale / 256), ensuring
  359. /// that non-zero values passed in remain non zero, no matter how low the scale
  360. /// argument.
  361. ///
  362. /// THIS FUNCTION ALWAYS MODIFIES ITS ARGUMENTS IN PLACE
  363. LIB8STATIC void nscale8x2_video( uint8_t& i, uint8_t& j, fract8 scale)
  364. {
  365. #if SCALE8_C == 1
  366. uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
  367. i = (i == 0) ? 0 : (((int)i * (int)(scale) ) >> 8) + nonzeroscale;
  368. j = (j == 0) ? 0 : (((int)j * (int)(scale) ) >> 8) + nonzeroscale;
  369. #elif SCALE8_AVRASM == 1
  370. nscale8_video_LEAVING_R1_DIRTY( i, scale);
  371. nscale8_video_LEAVING_R1_DIRTY( j, scale);
  372. cleanup_R1();
  373. #else
  374. #error "No implementation for nscale8x2 available."
  375. #endif
  376. }
  377. /// scale a 16-bit unsigned value by an 8-bit value,
  378. /// considered as numerator of a fraction whose denominator
  379. /// is 256. In other words, it computes i * (scale / 256)
  380. LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
  381. {
  382. #if SCALE16BY8_C == 1
  383. uint16_t result;
  384. #if FASTLED_SCALE8_FIXED == 1
  385. result = (i * (1+((uint16_t)scale))) >> 8;
  386. #else
  387. result = (i * scale) / 256;
  388. #endif
  389. return result;
  390. #elif SCALE16BY8_AVRASM == 1
  391. #if FASTLED_SCALE8_FIXED == 1
  392. uint16_t result = 0;
  393. asm volatile(
  394. // result.A = HighByte( (i.A x scale) + i.A )
  395. " mul %A[i], %[scale] \n\t"
  396. " add r0, %A[i] \n\t"
  397. // " adc r1, [zero] \n\t"
  398. // " mov %A[result], r1 \n\t"
  399. " adc %A[result], r1 \n\t"
  400. // result.A-B += i.B x scale
  401. " mul %B[i], %[scale] \n\t"
  402. " add %A[result], r0 \n\t"
  403. " adc %B[result], r1 \n\t"
  404. // cleanup r1
  405. " clr __zero_reg__ \n\t"
  406. // result.A-B += i.B
  407. " add %A[result], %B[i] \n\t"
  408. " adc %B[result], __zero_reg__ \n\t"
  409. : [result] "+r" (result)
  410. : [i] "r" (i), [scale] "r" (scale)
  411. : "r0", "r1"
  412. );
  413. return result;
  414. #else
  415. uint16_t result = 0;
  416. asm volatile(
  417. // result.A = HighByte(i.A x j )
  418. " mul %A[i], %[scale] \n\t"
  419. " mov %A[result], r1 \n\t"
  420. //" clr %B[result] \n\t"
  421. // result.A-B += i.B x j
  422. " mul %B[i], %[scale] \n\t"
  423. " add %A[result], r0 \n\t"
  424. " adc %B[result], r1 \n\t"
  425. // cleanup r1
  426. " clr __zero_reg__ \n\t"
  427. : [result] "+r" (result)
  428. : [i] "r" (i), [scale] "r" (scale)
  429. : "r0", "r1"
  430. );
  431. return result;
  432. #endif
  433. #else
  434. #error "No implementation for scale16by8 available."
  435. #endif
  436. }
  437. /// scale a 16-bit unsigned value by a 16-bit value,
  438. /// considered as numerator of a fraction whose denominator
  439. /// is 65536. In other words, it computes i * (scale / 65536)
  440. LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
  441. {
  442. #if SCALE16_C == 1
  443. uint16_t result;
  444. #if FASTLED_SCALE8_FIXED == 1
  445. result = ((uint32_t)(i) * (1+(uint32_t)(scale))) / 65536;
  446. #else
  447. result = ((uint32_t)(i) * (uint32_t)(scale)) / 65536;
  448. #endif
  449. return result;
  450. #elif SCALE16_AVRASM == 1
  451. #if FASTLED_SCALE8_FIXED == 1
  452. // implemented sort of like
  453. // result = ((i * scale) + i ) / 65536
  454. //
  455. // why not like this, you may ask?
  456. // result = (i * (scale+1)) / 65536
  457. // the answer is that if scale is 65535, then scale+1
  458. // will be zero, which is not what we want.
  459. uint32_t result;
  460. asm volatile(
  461. // result.A-B = i.A x scale.A
  462. " mul %A[i], %A[scale] \n\t"
  463. // save results...
  464. // basic idea:
  465. //" mov %A[result], r0 \n\t"
  466. //" mov %B[result], r1 \n\t"
  467. // which can be written as...
  468. " movw %A[result], r0 \n\t"
  469. // Because we're going to add i.A-B to
  470. // result.A-D, we DO need to keep both
  471. // the r0 and r1 portions of the product
  472. // UNlike in the 'unfixed scale8' version.
  473. // So the movw here is needed.
  474. : [result] "=r" (result)
  475. : [i] "r" (i),
  476. [scale] "r" (scale)
  477. : "r0", "r1"
  478. );
  479. asm volatile(
  480. // result.C-D = i.B x scale.B
  481. " mul %B[i], %B[scale] \n\t"
  482. //" mov %C[result], r0 \n\t"
  483. //" mov %D[result], r1 \n\t"
  484. " movw %C[result], r0 \n\t"
  485. : [result] "+r" (result)
  486. : [i] "r" (i),
  487. [scale] "r" (scale)
  488. : "r0", "r1"
  489. );
  490. const uint8_t zero = 0;
  491. asm volatile(
  492. // result.B-D += i.B x scale.A
  493. " mul %B[i], %A[scale] \n\t"
  494. " add %B[result], r0 \n\t"
  495. " adc %C[result], r1 \n\t"
  496. " adc %D[result], %[zero] \n\t"
  497. // result.B-D += i.A x scale.B
  498. " mul %A[i], %B[scale] \n\t"
  499. " add %B[result], r0 \n\t"
  500. " adc %C[result], r1 \n\t"
  501. " adc %D[result], %[zero] \n\t"
  502. // cleanup r1
  503. " clr r1 \n\t"
  504. : [result] "+r" (result)
  505. : [i] "r" (i),
  506. [scale] "r" (scale),
  507. [zero] "r" (zero)
  508. : "r0", "r1"
  509. );
  510. asm volatile(
  511. // result.A-D += i.A-B
  512. " add %A[result], %A[i] \n\t"
  513. " adc %B[result], %B[i] \n\t"
  514. " adc %C[result], %[zero] \n\t"
  515. " adc %D[result], %[zero] \n\t"
  516. : [result] "+r" (result)
  517. : [i] "r" (i),
  518. [zero] "r" (zero)
  519. );
  520. result = result >> 16;
  521. return result;
  522. #else
  523. uint32_t result;
  524. asm volatile(
  525. // result.A-B = i.A x scale.A
  526. " mul %A[i], %A[scale] \n\t"
  527. // save results...
  528. // basic idea:
  529. //" mov %A[result], r0 \n\t"
  530. //" mov %B[result], r1 \n\t"
  531. // which can be written as...
  532. " movw %A[result], r0 \n\t"
  533. // We actually don't need to do anything with r0,
  534. // as result.A is never used again here, so we
  535. // could just move the high byte, but movw is
  536. // one clock cycle, just like mov, so might as
  537. // well, in case we want to use this code for
  538. // a generic 16x16 multiply somewhere.
  539. : [result] "=r" (result)
  540. : [i] "r" (i),
  541. [scale] "r" (scale)
  542. : "r0", "r1"
  543. );
  544. asm volatile(
  545. // result.C-D = i.B x scale.B
  546. " mul %B[i], %B[scale] \n\t"
  547. //" mov %C[result], r0 \n\t"
  548. //" mov %D[result], r1 \n\t"
  549. " movw %C[result], r0 \n\t"
  550. : [result] "+r" (result)
  551. : [i] "r" (i),
  552. [scale] "r" (scale)
  553. : "r0", "r1"
  554. );
  555. const uint8_t zero = 0;
  556. asm volatile(
  557. // result.B-D += i.B x scale.A
  558. " mul %B[i], %A[scale] \n\t"
  559. " add %B[result], r0 \n\t"
  560. " adc %C[result], r1 \n\t"
  561. " adc %D[result], %[zero] \n\t"
  562. // result.B-D += i.A x scale.B
  563. " mul %A[i], %B[scale] \n\t"
  564. " add %B[result], r0 \n\t"
  565. " adc %C[result], r1 \n\t"
  566. " adc %D[result], %[zero] \n\t"
  567. // cleanup r1
  568. " clr r1 \n\t"
  569. : [result] "+r" (result)
  570. : [i] "r" (i),
  571. [scale] "r" (scale),
  572. [zero] "r" (zero)
  573. : "r0", "r1"
  574. );
  575. result = result >> 16;
  576. return result;
  577. #endif
  578. #else
  579. #error "No implementation for scale16 available."
  580. #endif
  581. }
  582. ///@}
  583. ///@defgroup Dimming Dimming and brightening functions
  584. ///
  585. /// Dimming and brightening functions
  586. ///
  587. /// The eye does not respond in a linear way to light.
  588. /// High speed PWM'd LEDs at 50% duty cycle appear far
  589. /// brighter then the 'half as bright' you might expect.
  590. ///
  591. /// If you want your midpoint brightness leve (128) to
  592. /// appear half as bright as 'full' brightness (255), you
  593. /// have to apply a 'dimming function'.
  594. ///@{
  595. /// Adjust a scaling value for dimming
  596. LIB8STATIC uint8_t dim8_raw( uint8_t x)
  597. {
  598. return scale8( x, x);
  599. }
  600. /// Adjust a scaling value for dimming for video (value will never go below 1)
  601. LIB8STATIC uint8_t dim8_video( uint8_t x)
  602. {
  603. return scale8_video( x, x);
  604. }
  605. /// Linear version of the dimming function that halves for values < 128
  606. LIB8STATIC uint8_t dim8_lin( uint8_t x )
  607. {
  608. if( x & 0x80 ) {
  609. x = scale8( x, x);
  610. } else {
  611. x += 1;
  612. x /= 2;
  613. }
  614. return x;
  615. }
  616. /// inverse of the dimming function, brighten a value
  617. LIB8STATIC uint8_t brighten8_raw( uint8_t x)
  618. {
  619. uint8_t ix = 255 - x;
  620. return 255 - scale8( ix, ix);
  621. }
  622. /// inverse of the dimming function, brighten a value
  623. LIB8STATIC uint8_t brighten8_video( uint8_t x)
  624. {
  625. uint8_t ix = 255 - x;
  626. return 255 - scale8_video( ix, ix);
  627. }
  628. /// inverse of the dimming function, brighten a value
  629. LIB8STATIC uint8_t brighten8_lin( uint8_t x )
  630. {
  631. uint8_t ix = 255 - x;
  632. if( ix & 0x80 ) {
  633. ix = scale8( ix, ix);
  634. } else {
  635. ix += 1;
  636. ix /= 2;
  637. }
  638. return 255 - ix;
  639. }
  640. ///@}
  641. #endif