преди 9 години · 771cfdc9e7
--- a/AudioTuner.cpp
+++ b/AudioTuner.cpp
@@ -22,164 +22,136 @@

 #include "AudioTuner.h"
 #include "utility/dspinst.h"
 #include "arm_math.h"

 #if SAMPLE_RATE == SAMPLE_RATE_44100
    #define SAMPLE_RATE_EXACT AUDIO_SAMPLE_RATE_EXACT / 1
 #elif SAMPLE_RATE == SAMPLE_RATE_22050
    #define SAMPLE_RATE_EXACT AUDIO_SAMPLE_RATE_EXACT / 2
 #elif SAMPLE_RATE == SAMPLE_RATE_11025
    #define SAMPLE_RATE_EXACT AUDIO_SAMPLE_RATE_EXACT / 4
 #endif

 #define HALF_BUFFER NUM_SAMPLES / 2
 #define HALF_BLOCKS AUDIO_BLOCKS * 64

 #define LOOP1(a)  a
 #define LOOP2(a)  a LOOP1(a)
 #define LOOP3(a)  a LOOP2(a)
 #define LOOP4(a)  a LOOP3(a)
 #define LOOP8(a)  a LOOP3(a) a LOOP3(a)
 #define LOOP16(a) a LOOP8(a) a LOOP2(a) a LOOP3(a)
 #define LOOP32(a)  a LOOP16(a) a LOOP8(a) a LOOP1(a) a LOOP3(a)
 #define LOOP64(a)  a LOOP32(a) a LOOP16(a) a LOOP8(a) a LOOP2(a) a LOOP1(a)
 #define UNROLL(n,a) LOOP##n(a)

 /**
 *  Audio update function.
 */
 static void copy_buffer(void *destination, const void *source) {
    const uint16_t *src = (const uint16_t *)source;
    uint16_t *dst = (uint16_t *)destination;
    for (int i=0; i < AUDIO_BLOCK_SAMPLES; i++) *dst++ = *src++;
 }

 void AudioTuner::update( void ) {
    
    audio_block_t *block;
    const int16_t *p, *end;
    block = receiveReadOnly( );
    
    if ( !block ) return;
    block = receiveReadOnly();
    if (!block) return;
    
    if ( !enabled ) {
        release( block );
        return;
    }
    
    p = block->data;
    end = p + AUDIO_BLOCK_SAMPLES;
    
    /*
     * Double buffering, one fills while the other is processed
     * 2x the throughput.
    */
    uint16_t *dst;
    bool next = next_buffer;
    if ( next ) {
        //digitalWriteFast(6, HIGH);
        dst = ( uint16_t * )buffer;
    digitalWriteFast(2, HIGH);
    if ( next_buffer ) {
        blocklist1[state++] = block;
        if ( !first_run && process_buffer ) process( );
    } else {
        blocklist2[state++] = block;
        if ( !first_run && process_buffer ) process( );
    }
    else {
       //digitalWriteFast(6, LOW);
       dst = ( uint16_t * )buffer + NUM_SAMPLES;
    
    if ( state >= AUDIO_BLOCKS ) {
        if ( next_buffer ) {
            if ( !first_run && process_buffer ) process( );
            for ( int i = 0; i < AUDIO_BLOCKS; i++ ) copy_buffer( AudioBuffer+( i * 0x80 ), blocklist1[i]->data );
            for ( int i = 0; i < AUDIO_BLOCKS; i++ ) release(blocklist1[i] );
        } else {
            if ( !first_run && process_buffer ) process( );
            for ( int i = 0; i < AUDIO_BLOCKS; i++ ) copy_buffer( AudioBuffer+( i * 0x80 ), blocklist2[i]->data );
            for ( int i = 0; i < AUDIO_BLOCKS; i++ ) release( blocklist2[i] );
        }
        process_buffer = true;
        first_run = false;
        state = 0;
        //digitalWriteFast(LED_BUILTIN, !digitalReadFast(LED_BUILTIN));
    }
 }

 FASTRUN void AudioTuner::process( void ) {
    //digitalWriteFast(0, HIGH);
    
    // gather data/and release block
    uint16_t count = count_global;
    const int16_t *p;
    p = AudioBuffer;
    
    uint16_t cycles = 64;;
    uint16_t tau = tau_global;
    do {
        *( dst+count++ ) = *( uint16_t * )p;
        p += SAMPLE_RATE;
    } while ( p < end );
    release( block );
        uint16_t x   = 0;
        int64_t  sum = 0;
        //uint32_t res;
        do {
            /*int16_t current1, lag1, current2, lag2;
             int32_t val1, val2;
             lag1 = *( ( uint32_t * )p + ( x + tau ) );
             current1 = *( ( uint32_t * )p + x );
             x += 32;
             lag2 = *( ( uint32_t * )p + ( x + tau ) );
             current2 = *( ( uint32_t * )p + x );
             val1 = __PKHBT(current1, current2, 0x10);
             val2 = __PKHBT(lag1, lag2, 0x10);
             res = __SSUB16( val1, val2 );
             sum = __SMLALD(res, res, sum);
             //sum = __SMLSLD(delta1, delta2, sum);*/
            int16_t current, lag, delta;
            //UNROLL(16,
                   lag = *( ( int16_t * )p + ( x+tau ) );
                   current = *( ( int16_t * )p+x );
                   delta = ( current-lag );
                   sum += delta * delta;
 #if F_CPU == 144000000
                   x += 8;
 #elif F_CPU == 120000000
                   x += 12;
 #elif F_CPU == 96000000
                   x += 16;
 #elif F_CPU < 96000000
                   x += 32;
 #endif
                   //);
        } while ( x <= HALF_BLOCKS );

        running_sum += sum;
        yin_buffer[yin_idx] = sum*tau;
        rs_buffer[yin_idx] = running_sum;
        yin_idx = ( ++yin_idx >= 5 ) ? 0 : yin_idx;
        tau = estimate( yin_buffer, rs_buffer, yin_idx, tau );

        if ( tau == 0 ) {
            process_buffer  = false;
            new_output      = true;
            yin_idx         = 1;
            running_sum     = 0;
            tau_global      = 1;
            //digitalWriteFast(2, LOW);
            //digitalWriteFast(0, LOW);
            return;
        }
    } while ( --cycles );
    
    /* 
     * If buffer full switch to start filling next
     * buffer and process the just filled buffer.
     */
    if ( count >= NUM_SAMPLES ) {
        //digitalWriteFast(2, !digitalReadFast(2));
        __disable_irq();
        next_buffer = !next_buffer;
        process_buffer  = true;
        count_global    = 0;
        tau_global      = 1;
    if ( tau >= HALF_BLOCKS ) {
        process_buffer  = false;
        new_output      = false;
        yin_idx         = 1;
        running_sum     = 0;
        count           = 0;
        __enable_irq();
    }
    count_global = count;// update global count
    
    /*
     * Set the number of cycles to be processed per receiving block.
     */
    uint16_t cycles;
    const uint16_t usage_max = cpu_usage_max;
    if ( AudioProcessorUsage( ) > usage_max ) {
 #if NUM_SAMPLES >= 8192
        cycles = tau_global + 2;
 #elif NUM_SAMPLES == 4096
        cycles = tau_global + 4;
 #elif NUM_SAMPLES == 2048
        cycles = tau_global + 8;
 #elif NUM_SAMPLES <= 1024
        cycles = tau_global + 32;
 #endif
    }
    else {
 #if NUM_SAMPLES >= 8192
        cycles = tau_global + 8;
 #elif NUM_SAMPLES == 4096
        cycles = tau_global + 16;
 #elif NUM_SAMPLES == 2048
        cycles = tau_global + 32;
 #elif NUM_SAMPLES <= 1024
        cycles = tau_global + 64;
 #endif
    }
    
    if ( process_buffer ) {
        //digitalWriteFast(0, HIGH);
        uint16_t tau;
        next = next_buffer;
        tau = tau_global;
        do {
            int64_t sum  = 0;
            const int16_t *end, *buf;
            if ( next ) {
                //digitalWriteFast(4, LOW);
                buf = buffer + NUM_SAMPLES;
            }
            else {
                //digitalWriteFast(4, HIGH);
                buf = buffer;
            }
            end = buf + HALF_BUFFER;
            
            // TODO: How to make faster?
            do {
                int16_t current, lag, delta;
                UNROLL( 8,
                       lag = *( buf + tau );
                       current = *buf++;
                       delta = current - lag;
                       //sum = multiply_accumulate_32x32_rshift32_rounded(sum, delta, delta);
                       sum += delta*delta;
                       );
            } while ( buf < end );
            
            running_sum += sum;
            yin_buffer[yin_idx] = sum*tau;
            rs_buffer[yin_idx] = running_sum;
            yin_idx = ( ++yin_idx >= 5 ) ? 0 : yin_idx;
            
            tau = estimate( yin_buffer, rs_buffer, yin_idx, tau );
            
            if ( tau == 0 ) {
                process_buffer  = false;
                new_output      = true;
                //digitalWriteFast(0, LOW);
                return;
            }
            else if ( tau >= HALF_BUFFER ) {
                process_buffer  = false;
                new_output      = false;
                //digitalWriteFast(0, LOW);
                return;
            }
            
        } while ( tau <= cycles );
        tau_global = tau;
        tau_global      = 1;
        //digitalWriteFast(0, LOW);
        return;
    }
    tau_global = tau;
    //digitalWriteFast(0, LOW);
 }

 /**
@@ -193,9 +165,10 @@ void AudioTuner::update( void ) {
 *  @return tau
 */
 uint16_t AudioTuner::estimate( int64_t *yin, int64_t *rs, uint16_t head, uint16_t tau ) {
    const int64_t *p = ( int64_t * )yin;
    const int64_t *y = ( int64_t * )yin;
    const int64_t *r = ( int64_t * )rs;
    uint16_t _tau, _head;
    const float thresh = yin_threshold;
    _tau = tau;
    _head = head;
    
@@ -209,19 +182,16 @@ uint16_t AudioTuner::estimate( int64_t *yin, int64_t *rs, uint16_t head, uint16_
        idx2 = ( idx2 >= 5 ) ? 0 : idx2;
        
        float s0, s1, s2;
        s0 = ( ( float )*( p+idx0 ) / *( r+idx0 ) );
        s1 = ( ( float )*( p+idx1 ) / *( r+idx1 ) );
        s2 = ( ( float )*( p+idx2 ) / *( r+idx2 ) );
        s0 = ( ( float )*( y+idx0 ) / *( r+idx0 ) );
        s1 = ( ( float )*( y+idx1 ) / *( r+idx1 ) );
        s2 = ( ( float )*( y+idx2 ) / *( r+idx2 ) );
        
        if ( s1 < yin_threshold && s1 < s2 ) {
        if ( s1 < thresh && s1 < s2 ) {
            uint16_t period = _tau - 3;
            periodicity = 1 - s1;
            data = period + 0.5f * ( s0 - s2 ) / ( s0 - 2.0f * s1 + s2 );
            return 0;
        }
        
        //if ( s1 > 2.4 ) return _tau + 2;
        //else return _tau + 1;
    }
    return _tau + 1;
 }
@@ -232,18 +202,19 @@ uint16_t AudioTuner::estimate( int64_t *yin, int64_t *rs, uint16_t head, uint16_
 *  @param threshold Allowed uncertainty
 *  @param cpu_max   How much cpu usage before throttling
 */
 void AudioTuner::initialize( float threshold, float cpu_max ) {
 void AudioTuner::initialize( float threshold ) {
    __disable_irq( );
    cpu_usage_max = cpu_max*100;
    yin_threshold = threshold;
    process_buffer = false;
    yin_threshold  = threshold;
    periodicity    = 0.0f;
    next_buffer    = true;
    running_sum    = 0;
    count_global   = 0;
    tau_global     = 1;
    first_run      = true;
    yin_idx        = 1;
    data           = 0;
    enabled        = true;
    state          = 0;
    data           = 0.0f;
    __enable_irq( );
 }

@@ -269,7 +240,7 @@ float AudioTuner::read( void ) {
    __disable_irq( );
    float d = data;
    __enable_irq( );
    return SAMPLE_RATE_EXACT / d;
    return AUDIO_SAMPLE_RATE_EXACT / d;
 }

 /**
--- a/AudioTuner.h
+++ b/AudioTuner.h
@@ -24,62 +24,46 @@
 #define AudioTuner_h_

 #include "AudioStream.h"
 /****************************************************************/
 #define SAMPLE_RATE_44100  1      // 44100    sample rate
 #define SAMPLE_RATE_22050  2      // 22050    sample rate
 #define SAMPLE_RATE_11025  4      // 11025    sample rate
 /****************************************************************/

 /****************************************************************
 *              Safe to adjust these values below               *
 *                                                              *
 *  These two parameters define how this object works.          *
 *  This parameter defines the size of the buffer.              *
 *                                                              *
 *  1.  NUM_SAMPLES - Size of the buffer. Since object uses     *
 *      double buffering this value will be 4x in bytes of      *
 *      memory.  !!! Must be power of 2 !!!!                    *
 *  1.  AUDIO_BLOCKS -  Buffer size is 128 * AUDIO_BLOCKS.      *
 *                      The more AUDIO_BLOCKS the lower the     *
 *                      frequency you can detect. The defualt   *
 *                      (24) is set to measure down to 29.14    *
 *                      Hz or B(flat)0.                         *
 *                                                              *
 *  2.  SAMPLE_RATE - Just what it says.                        *
 *                                                              *
 *  These two parameters work hand in hand. For example if you  *
 *  want a high sample rate but do not allocate enough buffer   *
 *  space, you will be limit how low of a frequency you can     *
 *  measure. If you then increase the buffer you use up         *
 *  precious ram and slow down the system since it takes longer *
 *  to processes the buffer.                                    *
 *                                                              *
 *  Play around with these values to find what best suits your  *
 *  needs. The max number of buffers you can have is 8192 bins. *
 ****************************************************************/
 // !!! Must be power of 2 !!!!
 #define NUM_SAMPLES 2048 // make a power of two

 // Use defined sample rates above^
 #define SAMPLE_RATE SAMPLE_RATE_22050
 #define AUDIO_BLOCKS  24
 /****************************************************************/

 class AudioTuner : public AudioStream
 {
 class AudioTuner : public AudioStream {
 public:
    /**
     *  constructor to setup Audio Library and initialize
     *
     *  @return none
     */
    AudioTuner( void ) : AudioStream( 1, inputQueueArray ), enabled( false ), new_output(false) {}
    AudioTuner( void ) : AudioStream( 1, inputQueueArray ), enabled( false ), new_output(false) {
    
    }
    
    /**
     *  initialize variables and start conversion
     *
     *  @param threshold Allowed uncertainty
     *  @param cpu_max   How much cpu usage before throttling
     *
     *  @return none
     */
    void initialize( float threshold, float cpu_max);
    void initialize( float threshold );
    
    /**
     *  sets threshold value
     *
     *  @param thresh
     *  @return none
     */
    void threshold( float p );
    
@@ -105,9 +89,11 @@ public:
    
    /**
     *  Audio Library calls this update function ~2.9ms
     *
     *  @return none
     */
    virtual void update( void );
    

 private:
    /**
     *  check the sampled data for fundamental frequency
@@ -121,14 +107,26 @@ private:
     */
    uint16_t estimate( int64_t *yin, int64_t *rs, uint16_t head, uint16_t tau );
    
    int16_t  buffer[NUM_SAMPLES*2] __attribute__ ( ( aligned ( 4 ) ) );
    float    periodicity, yin_threshold, data, cpu_usage_max;
    int64_t  rs_buffer[5], yin_buffer[5];
    /**
     *  process audio data
     *
     *  @return none
     */
    void process( void );
    
    /**
     *  Variables
     */
    uint64_t running_sum;
    uint16_t tau_global, count_global, tau_cycles;
    uint8_t  yin_idx;
    bool     enabled, process_buffer, next_buffer;
    volatile bool new_output;
    uint16_t tau_global;
    int64_t  rs_buffer[5], yin_buffer[5];
    int16_t  AudioBuffer[AUDIO_BLOCKS*128] __attribute__ ( ( aligned ( 4 ) ) );
    uint8_t  yin_idx, state;
    float    periodicity, yin_threshold, cpu_usage_max, data;
    bool     enabled, next_buffer, first_run;
    volatile bool new_output, process_buffer;
    audio_block_t *blocklist1[AUDIO_BLOCKS];
    audio_block_t *blocklist2[AUDIO_BLOCKS];
    audio_block_t *inputQueueArray[1];
 };
 #endif
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
    <b>Guitar and Bass Tuner Library v2.2</b><br>
    <b>Guitar and Bass Tuner Library v2.3</b><br>
    <b>Teensy 3.1/2</b><br>
 </p>

@@ -40,46 +40,28 @@
                                                       *---<\ P /
                                                             \_/

 >Many optimizations have been done to the [YIN] algorithm for frequencies between 29-360Hz. 
 >Many optimizations have been done to the [YIN] algorithm for frequencies between 29-400Hz. 
 >>While its still using a brute force method ( n<sup>2</sup> ) for finding the fundamental frequency f<sub>o</sub>, it is tuned to skip certain <b>tau</b> (<img src="http://latex.numberempire.com/render?%5Cinline%20%5Chuge%20%5Cmathbf%7B%5Ctau%7D&sig=845639da85c0dd8e2de679817b06639c"/></img>) values and focus mostly on frequencies found in the bass and guitar. 
 >>>The input is double buffered so while you are processing one buffer it is filling the other to double throughput. 
 >>>>There are a few parameters that can be adjusted to "dial in" the algorithm for better estimations located in AudioTuner.h. The defaults below are what I found that have the best trade off for speed and accuracy.
 >>>>The parameter AUDIO_BLOCKS below can be adjusted but its default of 24 I found to be best to work with the guitar and bass frequency range (29- 400)Hz. 
 >>>>Looking into finding the Auto Correlation using FFT and IFFT to speed up processing of data! Not that simple because the YIN algorithm uses a squared difference tweak to the Auto Correlation.

 <h4>AudioTuner.h</h4>

 ```
 /****************************************************************/
 #define SAMPLE_RATE_44100  1      // 44100    sample rate
 #define SAMPLE_RATE_22050  2      // 22050    sample rate
 #define SAMPLE_RATE_11025  4      // 11025    sample rate
 /****************************************************************/

 /****************************************************************
 *              Safe to adjust these values below               *
 *                                                              *
 *  These two parameters define how this object works.          *
 *  This parameter defines the size of the buffer.              *
 *                                                              *
 *  1.  NUM_SAMPLES - Size of the buffer. Since object uses     *
 *      double buffering this value will be 4x in bytes of      *
 *      memory.  !!! Must be power of 2 !!!!                    *
 *  1.  AUDIO_BLOCKS -  Buffer size is 128 * AUDIO_BLOCKS.      *
 *                      The more AUDIO_BLOCKS the lower the     *
 *                      frequency you can detect. The default   *
 *                      (24) is set to measure down to 29.14    *
 *                      Hz or B(flat)0.                         *
 *                                                              *
 *  2.  SAMPLE_RATE - Just what it says.                        *
 *                                                              *
 *  These two parameters work hand in hand. For example if you  *
 *  want a high sample rate but do not allocate enough buffer   *
 *  space, you will be limit how low of a frequency you can     *
 *  measure. If you then increase the buffer you use up         *
 *  precious ram and slow down the system since it takes longer *
 *  to processes the buffer.                                    *
 *                                                              *
 *  Play around with these values to find what best suits your  *
 *  needs. The max number of buffers you can have is 8192 bins. *
 ****************************************************************/
 // !!! Must be power of 2 !!!!
 #define NUM_SAMPLES 2048 // make a power of two

 // Use defined sample rates above^
 #define SAMPLE_RATE SAMPLE_RATE_22050
 #define AUDIO_BLOCKS  24
 /****************************************************************/
 ```

@@ -94,4 +76,5 @@
 </ol>
 </div>

 [YIN]:http://recherche.ircam.fr/equipes/pcm/cheveign/pss/2002_JASA_YIN.pdf
 [YIN]:http://recherche.ircam.fr/equipes/pcm/cheveign/pss/2002_JASA_YIN.pdf
 [Teensy Audio Library]:http://www.pjrc.com/teensy/td_libs_Audio.html
--- a/examples/Sample_Guitar_Tunning_Notes/Sample_Guitar_Tunning_Notes.ino
+++ b/examples/Sample_Guitar_Tunning_Notes/Sample_Guitar_Tunning_Notes.ino
@@ -57,16 +57,12 @@ void playNote(void) {
 }
 //---------------------------------------------------------------------------------------
 void setup() {
    AudioMemory(4);
    AudioMemory(30);
    /*
     *  Initialize the yin algorithm's absolute
     *  threshold, this is good number.
     *
     *  Percent of overall current cpu usage used
     *  before making the search algorithm less
     *  aggressive (0.0 - 1.0).
     */
    tuner.initialize(.15, .99);
    tuner.initialize(.15);
    pinMode(LED_BUILTIN, OUTPUT);
    playNoteTimer.begin(playNote, 1000);
 }
--- a/examples/Simple_Sine/Simple_Sine.ino
+++ b/examples/Simple_Sine/Simple_Sine.ino
@@ -40,16 +40,12 @@ AudioConnection patchCord3(mixer, 0, dac, 0);
 char buffer[10];

 void setup() {
    AudioMemory(4);
    AudioMemory(30);
    /*
     *  Initialize the yin algorithm's absolute
     *  threshold, this is good number.
     *
     *  Percent of overall current cpu usage used 
     *  before making the search algorithm less
     *  aggressive (0.0 - 1.0).
     */
    tuner.initialize(.15, .99);
    tuner.initialize(.15);
    
    sine.frequency(30.87);
    sine.amplitude(1);
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=AudioTuner
 version=2.2
 version=2.3
 author=Colin Duffy
 maintainer=Colin Duffy
 sentence=Yin algorithm
--- a/revision.md
+++ b/revision.md
@@ -1,3 +1,7 @@
 ><b>Updated (11/23/15 v2.3)</b><br>
 * Totally new method to gather and process data, data is available after 24 Blocks of data have been collected (~69.6ms) for all frequencies.<br>
 * Double buffer to collect Audio data, while one collects the other buffer is processed.<br>

 ><b>Updated (10/12/15 v2.2)</b><br>
 * Fixed yin cpu usage throttling code in update function.<br>
 * Function initialize second param takes a float (0.0 - 1.0).<br>