shifting both sides by 16 bit gives pretty good accuracy with fast code:
movw r3, #9155
ldr r2, .L2
mul r0, r3, r0
lsrs r0, r0, #1
umull r3, r0, r2, r0
lsrs r0, r0, #8
.L2:
.word 144122641
with 984 MHz it allows nsec to be max ~143000 max which is plenty
teensy4-core
| @@ -758,7 +758,7 @@ static inline void delayNanoseconds(uint32_t) __attribute__((always_inline, unus | |||
| static inline void delayNanoseconds(uint32_t nsec) | |||
| { | |||
| uint32_t begin = ARM_DWT_CYCCNT; | |||
| uint32_t cycles = ((uint64_t)F_CPU_ACTUAL * nsec) / 1000000000UL; | |||
| uint32_t cycles = ((F_CPU_ACTUAL>>16) * nsec) / (1000000000UL>>16); | |||
| while (ARM_DWT_CYCCNT - begin < cycles) ; // wait | |||
| } | |||