12 лет назад · 2a63896a70
--- a/Audio.cpp
+++ b/Audio.cpp
 	return out;
 }
 // computes (((int64_t)a[31:0] * (int64_t)b[31:0]) >> 32)
 static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline));
 static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
 {
 	int32_t out;
 	asm volatile("smmul %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
 	return out;
 }
 // computes (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
 static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline));
 static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
 {
 	int32_t out;
 	asm volatile("smmulr %0, %1, %2" : "=r" (out) : "r" (a), "r" (b));
 	return out;
 }
 // computes sum + (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
 static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline));
 static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
 {
 	int32_t out;
 	asm volatile("smmlar %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
 	return out;
 }
 // computes sum - (((int64_t)a[31:0] * (int64_t)b[31:0] + 0x8000000) >> 32)
 static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline));
 static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
 {
 	int32_t out;
 	asm volatile("smmlsr %0, %2, %3, %1" : "=r" (out) : "r" (sum), "r" (a), "r" (b));
 	return out;
 }
 // computes ((a[15:0] << 16) | b[15:0])
 static inline uint32_t pack_16x16(int32_t a, int32_t b) __attribute__((always_inline));
 /******************************************************************/
 static inline int32_t multiply_32x32_rshift30(int32_t a, int32_t b) __attribute__((always_inline));
 static inline int32_t multiply_32x32_rshift30(int32_t a, int32_t b)
 {
 	return ((int64_t)a * (int64_t)b) >> 30;
 }
 //#define TONE_DETECT_FAST
 void AudioAnalyzeToneDetect::update(void)
 {
 	audio_block_t *block;
 	int32_t q0, q1, q2, coef;
 	const int16_t *p, *end;
 	uint16_t n;
 	block = receiveReadOnly();
 	if (!block) return;
 	if (!enabled) {
 		release(block);
 		return;
 	}
 	p = block->data;
 	end = p + AUDIO_BLOCK_SAMPLES;
 	n = count;
 	coef = coefficient;
 	q1 = s1;
 	q2 = s2;
 	do {
 		// the Goertzel algorithm is kinda magical ;-)
 #ifdef TONE_DETECT_FAST
 		q0 = (*p++) + (multiply_32x32_rshift32_rounded(coef, q1) << 2) - q2;
 #else
 		q0 = (*p++) + multiply_32x32_rshift30(coef, q1) - q2;
 		// TODO: is this only 1 cycle slower?  if so, always use it
 #endif
 		q2 = q1;
 		q1 = q0;
 		if (--n == 0) {
 			out1 = q1;
 			out2 = q2;
 			q1 = 0;  // TODO: does clearing these help or hinder?
 			q2 = 0;
 			new_output = true;
 			n = length;
 		}
 	} while (p < end);
 	count = n;
 	s1 = q1;
 	s2 = q2;
 	release(block);
 }
 void AudioAnalyzeToneDetect::set_params(int32_t coef, uint16_t cycles, uint16_t len)
 {
 	__disable_irq();
 	coefficient = coef;
 	ncycles = cycles;
 	length = len;
 	count = len;
 	s1 = 0;
 	s2 = 0;
 	enabled = true;
 	__enable_irq();
 	Serial.printf("Tone: coef=%d, ncycles=%d, length=%d\n", coefficient, ncycles, length);
 }
 float AudioAnalyzeToneDetect::read(void)
 {
 	int32_t coef, q1, q2, power;
 	uint16_t len;
 	__disable_irq();
 	coef = coefficient;
 	q1 = out1;
 	q2 = out2;
 	len = length;
 	__enable_irq();
 #ifdef TONE_DETECT_FAST
 	power = multiply_32x32_rshift32_rounded(q2, q2);
 	power = multiply_accumulate_32x32_rshift32_rounded(power, q1, q1);
 	power = multiply_subtract_32x32_rshift32_rounded(power,
 		multiply_32x32_rshift30(q1, q2), coef);
 	power <<= 4;
 #else
 	int64_t power64;
 	power64 = (int64_t)q2 * (int64_t)q2;
 	power64 += (int64_t)q1 * (int64_t)q1;
 	power64 -= (((int64_t)q1 * (int64_t)q2) >> 30) * (int64_t)coef;
 	power = power64 >> 28;
 #endif
 	return sqrtf((float)power) / (float)len;
 }
 AudioAnalyzeToneDetect::operator bool()
 {
 	int32_t coef, q1, q2, power, trigger;
 	uint16_t len;
 	__disable_irq();
 	coef = coefficient;
 	q1 = out1;
 	q2 = out2;
 	len = length;
 	__enable_irq();
 #ifdef TONE_DETECT_FAST
 	power = multiply_32x32_rshift32_rounded(q2, q2);
 	power = multiply_accumulate_32x32_rshift32_rounded(power, q1, q1);
 	power = multiply_subtract_32x32_rshift32_rounded(power,
 		multiply_32x32_rshift30(q1, q2), coef);
 	power <<= 4;
 #else
 	int64_t power64;
 	power64 = (int64_t)q2 * (int64_t)q2;
 	power64 += (int64_t)q1 * (int64_t)q1;
 	power64 -= (((int64_t)q1 * (int64_t)q2) >> 30) * (int64_t)coef;
 	power = power64 >> 28;
 #endif
 	trigger = (uint32_t)len * thresh;
 	trigger = multiply_32x32_rshift32(trigger, trigger);
 	Serial.printf("bool: power=%d, trig=%d\n", power, trigger);
 	return (power >= trigger);
 }
 /******************************************************************/
--- a/Audio.h
+++ b/Audio.h
 class AudioAnalyzeToneDetect : public AudioStream
 {
 public:
 	AudioAnalyzeToneDetect(void)
 	  : AudioStream(1, inputQueueArray), thresh(6554), enabled(false) { }
 	void frequency(float freq, uint16_t cycles=10) {
 		set_params((int32_t)(cos((double)freq
 		  * (2.0 * 3.14159265358979323846 / AUDIO_SAMPLE_RATE_EXACT))
 		  * (double)2147483647.999), cycles,
 		  (float)AUDIO_SAMPLE_RATE_EXACT / freq * (float)cycles + 0.5f);
 	}
 	void set_params(int32_t coef, uint16_t cycles, uint16_t len);
 	bool available(void) {
 		__disable_irq();
 		bool flag = new_output;
 		if (flag) new_output = false;
 		__enable_irq();
 		return flag;
 	}
 	float read(void);
 	void threshold(float level) {
 		if (level < 0.01f) thresh = 655;
 		else if (level > 0.99f) thresh = 64881;
 		else thresh = level * 65536.0f + 0.5f;
 	}
 	operator bool();  // true if at or above threshold, false if below
 	virtual void update(void);
 private:
 	int32_t coefficient;	// Goertzel algorithm coefficient
 	int32_t s1, s2;		// Goertzel algorithm state
 	int32_t out1, out2;	// Goertzel algorithm state output
 	uint16_t length;	// number of samples to analyze
 	uint16_t count;		// how many left to analyze
 	uint16_t ncycles;	// number of waveform cycles to seek
 	uint16_t thresh;	// threshold, 655 to 64881 (1% to 99%)
 	bool enabled;
 	volatile bool new_output;
 	audio_block_t *inputQueueArray[1];
 };
 // TODO: more audio processing objects....
 //  N-channel mixer, adjustable gain on each channel
 //  sine wave with frequency modulation (phase)
 //  non-sine oscillators, ramp, triangle, square/pulse, etc
 //  waveforms with bandwidth limited tables for synth
 //  envelope: attack-decay-sustain-release, maybe other more complex?
 //  filters, low pass, high pass, bandpass, notch
 //  frequency analysis - FFT, single frequency (eg, filter for DTMF)
 //  MP3 decoding - it is possible with optimized code?
 //  other decompression, ADPCM, Vorbis, Speex, etc?