Переглянути джерело

rewrite AudioAnalyzeRMS - arm math version doesn't work!

dds
PaulStoffregen 9 роки тому
джерело
коміт
4b27e4d420
3 змінених файлів з 64 додано та 38 видалено
  1. +44
    -18
      analyze_rms.cpp
  2. +6
    -20
      analyze_rms.h
  3. +14
    -0
      utility/dspinst.h

+ 44
- 18
analyze_rms.cpp Переглянути файл

@@ -26,27 +26,53 @@


#include "analyze_rms.h"
#include <arm_math.h>
#include "utility/dspinst.h"

void AudioAnalyzeRMS::update(void)
{
audio_block_t *block;
int16_t rmsResult;

block = receiveReadOnly();
if (!block) {
return;
}

// not reinventing the wheel:
// use DSP packed 32i instructions as found in arm_math.h, with 64b accumulator
arm_rms_q15(block->data, AUDIO_BLOCK_SAMPLES, &rmsResult); // seems to use ~2% CPU
lastRMS = rmsResult; // prevent threading issues
// for optimization, one could re-implement arm_rms_q15 to do the sqrt on read().
// This way, the rms in dB could also be implemented faster:
// Instead of 20*log10(sqrt(MSerror)), one could do write 10*log10(MSerror)

new_output = true;
audio_block_t *block = receiveReadOnly();
if (!block) return;
#if 1
uint32_t *p = (uint32_t *)(block->data);
uint32_t *end = p + AUDIO_BLOCK_SAMPLES/2;
int64_t sum = accum;
do {
uint32_t n1 = *p++;
uint32_t n2 = *p++;
uint32_t n3 = *p++;
uint32_t n4 = *p++;
sum = multiply_accumulate_16tx16t_add_16bx16b(sum, n1, n1);
sum = multiply_accumulate_16tx16t_add_16bx16b(sum, n2, n2);
sum = multiply_accumulate_16tx16t_add_16bx16b(sum, n3, n3);
sum = multiply_accumulate_16tx16t_add_16bx16b(sum, n4, n4);
} while (p < end);
accum = sum;
count++;
#else
int16_t *p = block->data;
int16_t *end = p + AUDIO_BLOCK_SAMPLES;
int64_t sum = accum;
do {
int32_t n = *p++;
sum += n * n;
} while (p < end);
accum = sum;
count++;
#endif
release(block);
}

float AudioAnalyzeRMS::read(void)
{
__disable_irq();
int64_t sum = accum;
accum = 0;
uint32_t num = count;
count = 0;
__enable_irq();
float meansq = sum / (num * AUDIO_BLOCK_SAMPLES);
// TODO: shift down to 32 bits and use sqrt_uint32
// but is that really any more efficient?
return sqrtf(meansq) / 32767.0;
}


+ 6
- 20
analyze_rms.h Переглянути файл

@@ -33,32 +33,18 @@ class AudioAnalyzeRMS : public AudioStream
{
private:
audio_block_t *inputQueueArray[1];
volatile bool new_output;
int16_t lastRMS;
int64_t accum;
uint32_t count;

public:
AudioAnalyzeRMS(void) : AudioStream(1, inputQueueArray) {
lastRMS = 0;
accum = 0;
count = 0;
}

bool available(void) {
__disable_irq();
bool flag = new_output; // we don't reset new_output here, because if you don't read it,
//it'll still be available on the next call of available()
// (different from AnalyzePeak behavior, which resets it in available())
__enable_irq();
return flag;
}

float read(void) {
__disable_irq();
int rms = lastRMS;
new_output = false; // we can always set the new_output to false, even if it was false already
__enable_irq();
return rms / 32767.0;
return count > 0;
}
float read(void);
virtual void update(void);
};


+ 14
- 0
utility/dspinst.h Переглянути файл

@@ -274,6 +274,20 @@ static inline int32_t multiply_16tx16b_add_16bx16t(uint32_t a, uint32_t b)
return out;
}

// // computes sum += ((a[15:0] * b[15:0]) + (a[31:16] * b[31:16]))
static inline int64_t multiply_accumulate_16tx16t_add_16bx16b(int64_t sum, uint32_t a, uint32_t b)
{
asm volatile("smlald %Q0, %R0, %1, %2" : "+r" (sum) : "r" (a), "r" (b));
return sum;
}

// // computes sum += ((a[15:0] * b[31:16]) + (a[31:16] * b[15:0]))
static inline int64_t multiply_accumulate_16tx16b_add_16bx16t(int64_t sum, uint32_t a, uint32_t b)
{
asm volatile("smlaldx %Q0, %R0, %1, %2" : "+r" (sum) : "r" (a), "r" (b));
return sum;
}

// computes ((a[15:0] * b[15:0])
static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
static inline int32_t multiply_16bx16b(uint32_t a, uint32_t b)

Завантаження…
Відмінити
Зберегти