shifting both sides by 16 bit gives pretty good accuracy with fast code:
movw r3, #9155
ldr r2, .L2
mul r0, r3, r0
lsrs r0, r0, #1
umull r3, r0, r2, r0
lsrs r0, r0, #8
.L2:
.word 144122641
with 984 MHz it allows nsec to be max ~143000 max which is plenty
teensy4-core
| static inline void delayNanoseconds(uint32_t nsec) | static inline void delayNanoseconds(uint32_t nsec) | ||||
| { | { | ||||
| uint32_t begin = ARM_DWT_CYCCNT; | uint32_t begin = ARM_DWT_CYCCNT; | ||||
| uint32_t cycles = ((uint64_t)F_CPU_ACTUAL * nsec) / 1000000000UL; | |||||
| uint32_t cycles = ((F_CPU_ACTUAL>>16) * nsec) / (1000000000UL>>16); | |||||
| while (ARM_DWT_CYCCNT - begin < cycles) ; // wait | while (ARM_DWT_CYCCNT - begin < cycles) ; // wait | ||||
| } | } | ||||