Browse Source

Port DMAChannel.h to Teensy-LC

main
PaulStoffregen 10 years ago
parent
commit
d232e9a3aa
3 changed files with 608 additions and 270 deletions
  1. +86
    -2
      teensy3/DMAChannel.cpp
  2. +518
    -268
      teensy3/DMAChannel.h
  3. +4
    -0
      teensy3/kinetis.h

+ 86
- 2
teensy3/DMAChannel.cpp View File

@@ -1,11 +1,17 @@
#include "DMAChannel.h"

#if defined(KINETISK)

// The channel allocation bitmask is accessible from "C" namespace,
// so C-only code can reserve DMA channels
uint16_t dma_channel_allocated_mask = 0;



/****************************************************************/
/** Teensy 3.0 & 3.1 **/
/****************************************************************/
#if defined(KINETISK)

void DMAChannel::begin(bool force_initialization)
{
uint32_t ch = 0;
@@ -84,6 +90,85 @@ static void swap(DMAChannel &c1, DMAChannel &c2)
c2.TCD = t;
}

/****************************************************************/
/** Teensy-LC **/
/****************************************************************/
#elif defined(KINETISL)


void DMAChannel::begin(bool force_initialization)
{
uint32_t ch = 0;

__disable_irq();
if (!force_initialization && CFG && channel < DMA_NUM_CHANNELS
&& (dma_channel_allocated_mask & (1 << channel))
&& (uint32_t)CFG == (uint32_t)(0x40008100 + channel * 16)) {
// DMA channel already allocated
__enable_irq();
return;
}
while (1) {
if (!(dma_channel_allocated_mask & (1 << ch))) {
dma_channel_allocated_mask |= (1 << ch);
__enable_irq();
break;
}
if (++ch >= DMA_NUM_CHANNELS) {
__enable_irq();
CFG = (CFG_t *)0;
channel = DMA_NUM_CHANNELS;
return; // no more channels available
// attempts to use this object will hardfault
}
}
channel = ch;
SIM_SCGC7 |= SIM_SCGC7_DMA;
SIM_SCGC6 |= SIM_SCGC6_DMAMUX;
CFG = (CFG_t *)(0x40008100 + ch * 16);
CFG->DSR_BCR = DMA_DSR_BCR_DONE;
CFG->DCR = DMA_DCR_CS;
CFG->SAR = NULL;
CFG->DAR = NULL;
}

void DMAChannel::release(void)
{
if (channel >= DMA_NUM_CHANNELS) return;
CFG->DSR_BCR = DMA_DSR_BCR_DONE;
__disable_irq();
dma_channel_allocated_mask &= ~(1 << channel);
__enable_irq();
channel = 16;
CFG = (CFG_t *)0;
}

static uint32_t priority(const DMAChannel &c)
{
return 3 - c.channel;
}

static void swap(DMAChannel &c1, DMAChannel &c2)
{
uint8_t c;
DMABaseClass::CFG_t *t;

c = c1.channel;
c1.channel = c2.channel;
c2.channel = c;
t = c1.CFG;
c1.CFG = c2.CFG;
c2.CFG = t;
}




#endif




void DMAPriorityOrder(DMAChannel &ch1, DMAChannel &ch2)
{
if (priority(ch1) < priority(ch2)) swap(ch1, ch2);
@@ -106,4 +191,3 @@ void DMAPriorityOrder(DMAChannel &ch1, DMAChannel &ch2, DMAChannel &ch3, DMAChan
if (priority(ch3) < priority(ch4)) swap(ch2, ch3);
}

#endif

+ 518
- 268
teensy3/DMAChannel.h View File

@@ -3,10 +3,25 @@

#include "kinetis.h"

// This code is a work-in-progress. It's incomplete and not usable yet...
//
// Discussion about DMAChannel is here:
// http://forum.pjrc.com/threads/25778-Could-there-be-something-like-an-ISR-template-function/page3

#define DMACHANNEL_HAS_BEGIN
#define DMACHANNEL_HAS_BOOLEAN_CTOR


// The channel allocation bitmask is accessible from "C" namespace,
// so C-only code can reserve DMA channels
#ifdef __cplusplus
extern "C" {
#endif
extern uint16_t dma_channel_allocated_mask;
#ifdef __cplusplus
}
#endif


#ifdef __cplusplus

// known libraries with DMA usage (in need of porting to this new scheme):
//
@@ -17,12 +32,12 @@
// https://github.com/pixelmatix/SmartMatrix
// https://github.com/crteensy/DmaSpi <-- DmaSpi has adopted this scheme


/****************************************************************/
/** Teensy 3.0 & 3.1 **/
/****************************************************************/
#if defined(KINETISK)

#ifdef __cplusplus

#define DMACHANNEL_HAS_BEGIN
#define DMACHANNEL_HAS_BOOLEAN_CTOR

class DMABaseClass {
public:
@@ -538,263 +553,500 @@ public:
// code, but direct control of all parameters is possible.
uint8_t channel;
// TCD is accessible due to inheritance from DMABaseClass
};

// arrange the relative priority of 2 or more DMA channels
void DMAPriorityOrder(DMAChannel &ch1, DMAChannel &ch2);
void DMAPriorityOrder(DMAChannel &ch1, DMAChannel &ch2, DMAChannel &ch3);
void DMAPriorityOrder(DMAChannel &ch1, DMAChannel &ch2, DMAChannel &ch3, DMAChannel &ch4);

















/****************************************************************/
/** Teensy-LC **/
/****************************************************************/
#elif defined(KINETISL)


class DMABaseClass {
public:
typedef struct __attribute__((packed)) {
volatile const void * volatile SAR;
volatile void * volatile DAR;
volatile uint32_t DSR_BCR;
volatile uint32_t DCR;
} CFG_t;
CFG_t *CFG;

/***************************************/
/** Data Transfer **/
/***************************************/

// Use a single variable as the data source. Typically a register
// for receiving data from one of the hardware peripherals is used.
void source(volatile const signed char &p) { source(*(volatile const uint8_t *)&p); }
void source(volatile const unsigned char &p) {
CFG->SAR = &p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(1);
}
void source(volatile const signed short &p) { source(*(volatile const uint16_t *)&p); }
void source(volatile const unsigned short &p) {
CFG->SAR = &p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(2);
}
void source(volatile const signed int &p) { source(*(volatile const uint32_t *)&p); }
void source(volatile const unsigned int &p) { source(*(volatile const uint32_t *)&p); }
void source(volatile const signed long &p) { source(*(volatile const uint32_t *)&p); }
void source(volatile const unsigned long &p) {
CFG->SAR = &p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(0);
}

// Use a buffer (array of data) as the data source. Typically a
// buffer for transmitting data is used.
void sourceBuffer(volatile const signed char p[], unsigned int len) {
sourceBuffer((volatile const uint8_t *)p, len); }
void sourceBuffer(volatile const unsigned char p[], unsigned int len) {
if (len > 0xFFFFF) return;
CFG->SAR = p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(1) | DMA_DCR_SINC;
CFG->DSR_BCR = len;
}
void sourceBuffer(volatile const signed short p[], unsigned int len) {
sourceBuffer((volatile const uint16_t *)p, len); }
void sourceBuffer(volatile const unsigned short p[], unsigned int len) {
if (len > 0xFFFFF) return;
CFG->SAR = p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(2) | DMA_DCR_SINC;
CFG->DSR_BCR = len;
}
void sourceBuffer(volatile const signed int p[], unsigned int len) {
sourceBuffer((volatile const uint32_t *)p, len); }
void sourceBuffer(volatile const unsigned int p[], unsigned int len) {
sourceBuffer((volatile const uint32_t *)p, len); }
void sourceBuffer(volatile const signed long p[], unsigned int len) {
sourceBuffer((volatile const uint32_t *)p, len); }
void sourceBuffer(volatile const unsigned long p[], unsigned int len) {
if (len > 0xFFFFF) return;
CFG->SAR = p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(0) | DMA_DCR_SINC;
CFG->DSR_BCR = len;
}

// Use a circular buffer as the data source
void sourceCircular(volatile const signed char p[], unsigned int len) {
sourceCircular((volatile const uint8_t *)p, len); }
void sourceCircular(volatile const unsigned char p[], unsigned int len) {
uint32_t mod = len2mod(len);
if (mod == 0) return;
CFG->SAR = p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(1) | DMA_DCR_SINC
| DMA_DCR_SMOD(mod);
CFG->DSR_BCR = len;
}
void sourceCircular(volatile const signed short p[], unsigned int len) {
sourceCircular((volatile const uint16_t *)p, len); }
void sourceCircular(volatile const unsigned short p[], unsigned int len) {
uint32_t mod = len2mod(len);
if (mod == 0) return;
CFG->SAR = p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(2) | DMA_DCR_SINC
| DMA_DCR_SMOD(mod);
CFG->DSR_BCR = len;
}
void sourceCircular(volatile const signed int p[], unsigned int len) {
sourceCircular((volatile const uint32_t *)p, len); }
void sourceCircular(volatile const unsigned int p[], unsigned int len) {
sourceCircular((volatile const uint32_t *)p, len); }
void sourceCircular(volatile const signed long p[], unsigned int len) {
sourceCircular((volatile const uint32_t *)p, len); }
void sourceCircular(volatile const unsigned long p[], unsigned int len) {
uint32_t mod = len2mod(len);
if (mod == 0) return;
CFG->SAR = p;
CFG->DCR = (CFG->DCR & 0xF08E0FFF) | DMA_DCR_SSIZE(0) | DMA_DCR_SINC
| DMA_DCR_SMOD(mod);
CFG->DSR_BCR = len;
}

// Use a single variable as the data destination. Typically a register
// for transmitting data to one of the hardware peripherals is used.
void destination(volatile signed char &p) { destination(*(volatile uint8_t *)&p); }
void destination(volatile unsigned char &p) {
CFG->DAR = &p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(1);
}
void destination(volatile signed short &p) { destination(*(volatile uint16_t *)&p); }
void destination(volatile unsigned short &p) {
CFG->DAR = &p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(2);
}
void destination(volatile signed int &p) { destination(*(volatile uint32_t *)&p); }
void destination(volatile unsigned int &p) { destination(*(volatile uint32_t *)&p); }
void destination(volatile signed long &p) { destination(*(volatile uint32_t *)&p); }
void destination(volatile unsigned long &p) {
CFG->DAR = &p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(0);
}

// Use a buffer (array of data) as the data destination. Typically a
// buffer for receiving data is used.
void destinationBuffer(volatile signed char p[], unsigned int len) {
destinationBuffer((volatile uint8_t *)p, len); }
void destinationBuffer(volatile unsigned char p[], unsigned int len) {
CFG->DAR = p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(1) | DMA_DCR_DINC;
CFG->DSR_BCR = len;
}
void destinationBuffer(volatile signed short p[], unsigned int len) {
destinationBuffer((volatile uint16_t *)p, len); }
void destinationBuffer(volatile unsigned short p[], unsigned int len) {
CFG->DAR = p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(2) | DMA_DCR_DINC;
CFG->DSR_BCR = len;
}
void destinationBuffer(volatile signed int p[], unsigned int len) {
destinationBuffer((volatile uint32_t *)p, len); }
void destinationBuffer(volatile unsigned int p[], unsigned int len) {
destinationBuffer((volatile uint32_t *)p, len); }
void destinationBuffer(volatile signed long p[], unsigned int len) {
destinationBuffer((volatile uint32_t *)p, len); }
void destinationBuffer(volatile unsigned long p[], unsigned int len) {
CFG->DAR = p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(0) | DMA_DCR_DINC;
CFG->DSR_BCR = len;
}

// Use a circular buffer as the data destination
void destinationCircular(volatile signed char p[], unsigned int len) {
destinationCircular((volatile uint8_t *)p, len); }
void destinationCircular(volatile unsigned char p[], unsigned int len) {
uint32_t mod = len2mod(len);
if (mod == 0) return;
CFG->DAR = p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(1) | DMA_DCR_DINC
| DMA_DCR_DMOD(mod);
CFG->DSR_BCR = len;
}
void destinationCircular(volatile signed short p[], unsigned int len) {
destinationCircular((volatile uint16_t *)p, len); }
void destinationCircular(volatile unsigned short p[], unsigned int len) {
uint32_t mod = len2mod(len);
if (mod == 0) return;
CFG->DAR = p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(1) | DMA_DCR_DINC
| DMA_DCR_DMOD(mod);
CFG->DSR_BCR = len;
}
void destinationCircular(volatile signed int p[], unsigned int len) {
destinationCircular((volatile uint32_t *)p, len); }
void destinationCircular(volatile unsigned int p[], unsigned int len) {
destinationCircular((volatile uint32_t *)p, len); }
void destinationCircular(volatile signed long p[], unsigned int len) {
destinationCircular((volatile uint32_t *)p, len); }
void destinationCircular(volatile unsigned long p[], unsigned int len) {
uint32_t mod = len2mod(len);
if (mod == 0) return;
CFG->DAR = p;
CFG->DCR = (CFG->DCR & 0xF0F0F0FF) | DMA_DCR_DSIZE(1) | DMA_DCR_DINC
| DMA_DCR_DMOD(mod);
CFG->DSR_BCR = len;
}

/*************************************************/
/** Quantity of Data to Transfer **/
/*************************************************/

// Set the data size used for each triggered transfer
void transferSize(unsigned int len) {
uint32_t dcr = CFG->DCR & 0xF0C8FFFF;
if (len == 4) {
CFG->DCR = dcr | DMA_DCR_DSIZE(0) | DMA_DCR_DSIZE(0);
} else if (len == 2) {
CFG->DCR = dcr | DMA_DCR_DSIZE(0) | DMA_DCR_DSIZE(0);
} else {
CFG->DCR = dcr | DMA_DCR_DSIZE(0) | DMA_DCR_DSIZE(0);
}
}

// Set the number of transfers (number of triggers until complete)
void transferCount(unsigned int len) {
uint32_t s, d, n = 0; // 0 = 8 bit, 1 = 16 bit, 2 = 32 bit
uint32_t dcr = CFG->DCR;
s = (dcr >> 20) & 3;
d = (dcr >> 17) & 3;
if (s == 0 || d == 0) n = 2;
else if (s == 2 || d == 2) n = 1;
CFG->DSR_BCR = len >> n;
}

/*************************************************/
/** Special Options / Features **/
/*************************************************/

void interruptAtCompletion(void) {
CFG->DCR |= DMA_DCR_EINT;
}

/* usage cases:

************************
OctoWS2811:
************************

// enable clocks to the DMA controller and DMAMUX
SIM_SCGC7 |= SIM_SCGC7_DMA;
SIM_SCGC6 |= SIM_SCGC6_DMAMUX;
DMA_CR = 0;
DMA_CERQ = 1;
DMA_CERQ = 2;
DMA_CERQ = 3;

// DMA channel #1 sets WS2811 high at the beginning of each cycle
DMA_TCD1_SADDR = &ones;
DMA_TCD1_SOFF = 0;
DMA_TCD1_ATTR = DMA_TCD_ATTR_SSIZE(0) | DMA_TCD_ATTR_DSIZE(0);
DMA_TCD1_NBYTES_MLNO = 1;
DMA_TCD1_SLAST = 0;
DMA_TCD1_DADDR = &GPIOD_PSOR;
DMA_TCD1_DOFF = 0;
DMA_TCD1_CITER_ELINKNO = bufsize;
DMA_TCD1_DLASTSGA = 0;
DMA_TCD1_CSR = DMA_TCD_CSR_DREQ;
DMA_TCD1_BITER_ELINKNO = bufsize;
dma1.source(ones);
dma1.destination(GPIOD_PSOR);
dma1.size(1);
dma1.count(bufsize);
dma1.disableOnCompletion();

// DMA channel #2 writes the pixel data at 20% of the cycle
DMA_TCD2_SADDR = frameBuffer;
DMA_TCD2_SOFF = 1;
DMA_TCD2_ATTR = DMA_TCD_ATTR_SSIZE(0) | DMA_TCD_ATTR_DSIZE(0);
DMA_TCD2_NBYTES_MLNO = 1;
DMA_TCD2_SLAST = -bufsize;
DMA_TCD2_DADDR = &GPIOD_PDOR;
DMA_TCD2_DOFF = 0;
DMA_TCD2_CITER_ELINKNO = bufsize;
DMA_TCD2_DLASTSGA = 0;
DMA_TCD2_CSR = DMA_TCD_CSR_DREQ;
DMA_TCD2_BITER_ELINKNO = bufsize;
dma2.source(frameBuffer, sizeof(frameBuffer));
dma2.destination(GPIOD_PDOR);
dma2.size(1);
dma2.count(bufsize);
dma2.disableOnCompletion();

// DMA channel #3 clear all the pins low at 48% of the cycle
DMA_TCD3_SADDR = &ones;
DMA_TCD3_SOFF = 0;
DMA_TCD3_ATTR = DMA_TCD_ATTR_SSIZE(0) | DMA_TCD_ATTR_DSIZE(0);
DMA_TCD3_NBYTES_MLNO = 1;
DMA_TCD3_SLAST = 0;
DMA_TCD3_DADDR = &GPIOD_PCOR;
DMA_TCD3_DOFF = 0;
DMA_TCD3_CITER_ELINKNO = bufsize;
DMA_TCD3_DLASTSGA = 0;
DMA_TCD3_CSR = DMA_TCD_CSR_DREQ | DMA_TCD_CSR_INTMAJOR;
DMA_TCD3_BITER_ELINKNO = bufsize;
dma3.source(ones);
dma3.destination(GPIOD_PCOR);
dma3.size(1);
dma3.count(bufsize);
dma3.disableOnCompletion();

************************
Audio, DAC
************************

DMA_CR = 0;
DMA_TCD4_SADDR = dac_buffer;
DMA_TCD4_SOFF = 2;
DMA_TCD4_ATTR = DMA_TCD_ATTR_SSIZE(1) | DMA_TCD_ATTR_DSIZE(1);
DMA_TCD4_NBYTES_MLNO = 2;
DMA_TCD4_SLAST = -sizeof(dac_buffer);
DMA_TCD4_DADDR = &DAC0_DAT0L;
DMA_TCD4_DOFF = 0;
DMA_TCD4_CITER_ELINKNO = sizeof(dac_buffer) / 2;
DMA_TCD4_DLASTSGA = 0;
DMA_TCD4_BITER_ELINKNO = sizeof(dac_buffer) / 2;
DMA_TCD4_CSR = DMA_TCD_CSR_INTHALF | DMA_TCD_CSR_INTMAJOR;
DMAMUX0_CHCFG4 = DMAMUX_DISABLE;
DMAMUX0_CHCFG4 = DMAMUX_SOURCE_PDB | DMAMUX_ENABLE;

************************
Audio, I2S
************************

DMA_CR = 0;
DMA_TCD0_SADDR = i2s_tx_buffer;
DMA_TCD0_SOFF = 2;
DMA_TCD0_ATTR = DMA_TCD_ATTR_SSIZE(1) | DMA_TCD_ATTR_DSIZE(1);
DMA_TCD0_NBYTES_MLNO = 2;
DMA_TCD0_SLAST = -sizeof(i2s_tx_buffer);
DMA_TCD0_DADDR = &I2S0_TDR0;
DMA_TCD0_DOFF = 0;
DMA_TCD0_CITER_ELINKNO = sizeof(i2s_tx_buffer) / 2;
DMA_TCD0_DLASTSGA = 0;
DMA_TCD0_BITER_ELINKNO = sizeof(i2s_tx_buffer) / 2;
DMA_TCD0_CSR = DMA_TCD_CSR_INTHALF | DMA_TCD_CSR_INTMAJOR;

DMAMUX0_CHCFG0 = DMAMUX_DISABLE;
DMAMUX0_CHCFG0 = DMAMUX_SOURCE_I2S0_TX | DMAMUX_ENABLE;

************************
ADC lib, Pedro Villanueva
************************

DMA_CR = 0; // normal mode of operation

*DMAMUX0_CHCFG = DMAMUX_DISABLE; // disable before changing

*DMA_TCD_ATTR = DMA_TCD_ATTR_SSIZE(DMA_TCD_ATTR_SIZE_16BIT) |
DMA_TCD_ATTR_DSIZE(DMA_TCD_ATTR_SIZE_16BIT) |
DMA_TCD_ATTR_DMOD(4); // src and dst data is 16 bit (2 bytes), buffer size 2^^4 bytes = 8 values
*DMA_TCD_NBYTES_MLNO = 2; // Minor Byte Transfer Count 2 bytes = 16 bits (we transfer 2 bytes each minor loop)

*DMA_TCD_SADDR = ADC_RA; // source address
*DMA_TCD_SOFF = 0; // don't change the address when minor loop finishes
*DMA_TCD_SLAST = 0; // don't change src address after major loop completes

*DMA_TCD_DADDR = elems; // destination address
*DMA_TCD_DOFF = 2; // increment 2 bytes each minor loop
*DMA_TCD_DLASTSGA = 0; // modulus feature takes care of going back to first element

*DMA_TCD_CITER_ELINKNO = 1; // Current Major Iteration Count with channel linking disabled
*DMA_TCD_BITER_ELINKNO = 1; // Starting Major Iteration Count with channel linking disabled

*DMA_TCD_CSR = DMA_TCD_CSR_INTMAJOR; // Control and status: interrupt when major counter is complete

DMA_CERQ = DMA_CERQ_CERQ(DMA_channel); // clear all past request
DMA_CINT = DMA_channel; // clear interrupts

uint8_t DMAMUX_SOURCE_ADC = DMAMUX_SOURCE_ADC0;
if(ADC_number==1){
DMAMUX_SOURCE_ADC = DMAMUX_SOURCE_ADC1;
}
*DMAMUX0_CHCFG = DMAMUX_SOURCE_ADC | DMAMUX_ENABLE; // enable mux and set channel DMA_channel to ADC0

DMA_SERQ = DMA_SERQ_SERQ(DMA_channel); // enable DMA request
NVIC_ENABLE_IRQ(IRQ_DMA_CH); // enable interrupts

************************
SmartMatrix
************************

// enable minor loop mapping so addresses can get reset after minor loops
DMA_CR = 1 << 7;

// DMA channel #0 - on latch rising edge, read address from fixed address temporary buffer, and output address on GPIO
// using combo of writes to set+clear registers, to only modify the address pins and not other GPIO pins
// address temporary buffer is refreshed before each DMA trigger (by DMA channel #2)
// only use single major loop, never disable channel
#define ADDRESS_ARRAY_REGISTERS_TO_UPDATE 2
DMA_TCD0_SADDR = &gpiosync.gpio_pcor;
DMA_TCD0_SOFF = (int)&gpiosync.gpio_psor - (int)&gpiosync.gpio_pcor;
DMA_TCD0_SLAST = (ADDRESS_ARRAY_REGISTERS_TO_UPDATE * ((int)&ADDX_GPIO_CLEAR_REGISTER - (int)&ADDX_GPIO_SET_REGISTER));
DMA_TCD0_ATTR = DMA_TCD_ATTR_SSIZE(2) | DMA_TCD_ATTR_DSIZE(2);
// Destination Minor Loop Offset Enabled - transfer appropriate number of bytes per minor loop, and put DADDR back to original value when minor loop is complete
// Source Minor Loop Offset Enabled - source buffer is same size and offset as destination so values reset after each minor loop
DMA_TCD0_NBYTES_MLOFFYES = DMA_TCD_NBYTES_SMLOE | DMA_TCD_NBYTES_DMLOE |
((ADDRESS_ARRAY_REGISTERS_TO_UPDATE * ((int)&ADDX_GPIO_CLEAR_REGISTER - (int)&ADDX_GPIO_SET_REGISTER)) << 10) |
(ADDRESS_ARRAY_REGISTERS_TO_UPDATE * sizeof(gpiosync.gpio_psor));
// start on higher value of two registers, and make offset decrement to avoid negative number in NBYTES_MLOFFYES (TODO: can switch order by masking negative offset)
DMA_TCD0_DADDR = &ADDX_GPIO_CLEAR_REGISTER;
// update destination address so the second update per minor loop is ADDX_GPIO_SET_REGISTER
DMA_TCD0_DOFF = (int)&ADDX_GPIO_SET_REGISTER - (int)&ADDX_GPIO_CLEAR_REGISTER;
DMA_TCD0_DLASTSGA = (ADDRESS_ARRAY_REGISTERS_TO_UPDATE * ((int)&ADDX_GPIO_CLEAR_REGISTER - (int)&ADDX_GPIO_SET_REGISTER));
// single major loop
DMA_TCD0_CITER_ELINKNO = 1;
DMA_TCD0_BITER_ELINKNO = 1;
// link channel 1, enable major channel-to-channel linking, don't clear enable on major loop complete
DMA_TCD0_CSR = (1 << 8) | (1 << 5);
DMAMUX0_CHCFG0 = DMAMUX_SOURCE_LATCH_RISING_EDGE | DMAMUX_ENABLE;

// DMA channel #1 - copy address values from current position in array to buffer to temporarily hold row values for the next timer cycle
// only use single major loop, never disable channel
DMA_TCD1_SADDR = &matrixUpdateBlocks[0][0].addressValues;
DMA_TCD1_SOFF = sizeof(uint16_t);
DMA_TCD1_SLAST = sizeof(matrixUpdateBlock) - (ADDRESS_ARRAY_REGISTERS_TO_UPDATE * sizeof(uint16_t));
DMA_TCD1_ATTR = DMA_TCD_ATTR_SSIZE(1) | DMA_TCD_ATTR_DSIZE(1);
// 16-bit = 2 bytes transferred
// transfer two 16-bit values, reset destination address back after each minor loop
DMA_TCD1_NBYTES_MLOFFNO = (ADDRESS_ARRAY_REGISTERS_TO_UPDATE * sizeof(uint16_t));
// start with the register that's the highest location in memory and make offset decrement to avoid negative number in NBYTES_MLOFFYES register (TODO: can switch order by masking negative offset)
DMA_TCD1_DADDR = &gpiosync.gpio_pcor;
DMA_TCD1_DOFF = (int)&gpiosync.gpio_psor - (int)&gpiosync.gpio_pcor;
DMA_TCD1_DLASTSGA = (ADDRESS_ARRAY_REGISTERS_TO_UPDATE * ((int)&gpiosync.gpio_pcor - (int)&gpiosync.gpio_psor));
// no minor loop linking, single major loop, single minor loop, don't clear enable after major loop complete
DMA_TCD1_CITER_ELINKNO = 1;
DMA_TCD1_BITER_ELINKNO = 1;
DMA_TCD1_CSR = 0;

// DMA channel #2 - on latch falling edge, load FTM1_CV1 and FTM1_MOD with with next values from current block
// only use single major loop, never disable channel
// link to channel 3 when complete
#define TIMER_REGISTERS_TO_UPDATE 2
DMA_TCD2_SADDR = &matrixUpdateBlocks[0][0].timerValues.timer_oe;
DMA_TCD2_SOFF = sizeof(uint16_t);
DMA_TCD2_SLAST = sizeof(matrixUpdateBlock) - (TIMER_REGISTERS_TO_UPDATE * sizeof(uint16_t));
DMA_TCD2_ATTR = DMA_TCD_ATTR_SSIZE(1) | DMA_TCD_ATTR_DSIZE(1);
// 16-bit = 2 bytes transferred
DMA_TCD2_NBYTES_MLOFFNO = TIMER_REGISTERS_TO_UPDATE * sizeof(uint16_t);
DMA_TCD2_DADDR = &FTM1_C1V;
DMA_TCD2_DOFF = (int)&FTM1_MOD - (int)&FTM1_C1V;
DMA_TCD2_DLASTSGA = TIMER_REGISTERS_TO_UPDATE * ((int)&FTM1_C1V - (int)&FTM1_MOD);
// no minor loop linking, single major loop
DMA_TCD2_CITER_ELINKNO = 1;
DMA_TCD2_BITER_ELINKNO = 1;
// link channel 3, enable major channel-to-channel linking, don't clear enable after major loop complete
DMA_TCD2_CSR = (3 << 8) | (1 << 5);
DMAMUX0_CHCFG2 = DMAMUX_SOURCE_LATCH_FALLING_EDGE | DMAMUX_ENABLE;

#define DMA_TCD_MLOFF_MASK (0x3FFFFC00)

// DMA channel #3 - repeatedly load gpio_array into GPIOD_PDOR, stop and int on major loop complete
DMA_TCD3_SADDR = matrixUpdateData[0][0];
DMA_TCD3_SOFF = sizeof(matrixUpdateData[0][0]) / 2;
// SADDR will get updated by ISR, no need to set SLAST
DMA_TCD3_SLAST = 0;
DMA_TCD3_ATTR = DMA_TCD_ATTR_SSIZE(0) | DMA_TCD_ATTR_DSIZE(0);
// after each minor loop, set source to point back to the beginning of this set of data,
// but advance by 1 byte to get the next significant bits data
DMA_TCD3_NBYTES_MLOFFYES = DMA_TCD_NBYTES_SMLOE |
(((1 - sizeof(matrixUpdateData[0])) << 10) & DMA_TCD_MLOFF_MASK) |
(MATRIX_WIDTH * DMA_UPDATES_PER_CLOCK);
DMA_TCD3_DADDR = &GPIOD_PDOR;
DMA_TCD3_DOFF = 0;
DMA_TCD3_DLASTSGA = 0;
DMA_TCD3_CITER_ELINKNO = LATCHES_PER_ROW;
DMA_TCD3_BITER_ELINKNO = LATCHES_PER_ROW;
// int after major loop is complete
DMA_TCD3_CSR = DMA_TCD_CSR_INTMAJOR;
// for debugging - enable bandwidth control (space out GPIO updates so they can be seen easier on a low-bandwidth logic analyzer)
//DMA_TCD3_CSR |= (0x02 << 14);

// enable a done interrupt when all DMA operations are complete
NVIC_ENABLE_IRQ(IRQ_DMA_CH3);

// enable additional dma interrupt used as software interrupt
NVIC_SET_PRIORITY(IRQ_DMA_CH1, 0xFF); // 0xFF = lowest priority
NVIC_ENABLE_IRQ(IRQ_DMA_CH1);

// enable channels 0, 1, 2, 3
DMA_ERQ = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3);

// at the end after everything is set up: enable timer from system clock, with appropriate prescale
FTM1_SC = FTM_SC_CLKS(1) | FTM_SC_PS(LATCH_TIMER_PRESCALE);


*/
void disableOnCompletion(void) {
CFG->DCR |= DMA_DCR_D_REQ;
}

// Kinetis-L DMA does not have these features :-(
//
// void interruptAtHalf(void) {}
// void replaceSettingsOnCompletion(const DMABaseClass &settings) {};
// TODO: can a 2nd linked channel be used to emulate this?

protected:
// users should not be able to create instances of DMABaseClass, which
// require the inheriting class to initialize the TCD pointer.
DMABaseClass() {}

static inline void copy_cfg(CFG_t *dst, const CFG_t *src) {
dst->SAR = src->SAR;
dst->DAR = src->DAR;
dst->DSR_BCR = src->DSR_BCR;
dst->DCR = src->DCR;
}
private:
static inline uint32_t len2mod(uint32_t len) {
if (len < 16) return 0;
if (len < 32) return 1;
if (len < 64) return 2;
if (len < 128) return 3;
if (len < 256) return 4;
if (len < 512) return 5;
if (len < 1024) return 6;
if (len < 2048) return 7;
if (len < 4096) return 8;
if (len < 8192) return 9;
if (len < 16384) return 10;
if (len < 32768) return 11;
if (len < 65536) return 12;
if (len < 131072) return 13;
if (len < 262144) return 14;
return 15;
}
};


// DMASetting represents settings stored only in memory, which can be
// applied to any DMA channel.

class DMASetting : public DMABaseClass {
public:
DMASetting() {
cfgdata.SAR = NULL;
cfgdata.DAR = NULL;
cfgdata.DSR_BCR = 0;
cfgdata.DCR = DMA_DCR_CS;
CFG = &cfgdata;
}
DMASetting(const DMASetting &c) {
CFG = &cfgdata;
*this = c;
}
DMASetting(const DMABaseClass &c) {
CFG = &cfgdata;
*this = c;
}
DMASetting & operator = (const DMABaseClass &rhs) {
copy_cfg(CFG, rhs.CFG);
return *this;
}
private:
CFG_t cfgdata __attribute__((aligned(4)));
};


// DMAChannel reprents an actual DMA channel and its current settings

class DMAChannel : public DMABaseClass {
public:
/*************************************************/
/** Channel Allocation **/
/*************************************************/

DMAChannel() {
begin();
}
DMAChannel(const DMAChannel &c) {
CFG = c.CFG;
channel = c.channel;
}
DMAChannel(const DMASetting &c) {
begin();
copy_cfg(CFG, c.CFG);
}
DMAChannel(bool allocate) {
if (allocate) begin();
}
DMAChannel & operator = (const DMAChannel &rhs) {
if (channel != rhs.channel) {
release();
CFG = rhs.CFG;
channel = rhs.channel;
}
return *this;
}
DMAChannel & operator = (const DMASetting &rhs) {
copy_cfg(CFG, rhs.CFG);
return *this;
}
~DMAChannel() {
release();
}
void begin(bool force_initialization = false);
private:
void release(void);

public:
/***************************************/
/** Triggering **/
/***************************************/

// Triggers cause the DMA channel to actually move data. Each
// trigger moves a single data unit, which is typically 8, 16 or
// 32 bits. If a channel is configured for 200 transfers

// Use a hardware trigger to make the DMA channel run
void triggerAtHardwareEvent(uint8_t source) {
volatile uint8_t *mux;
mux = (volatile uint8_t *)&(DMAMUX0_CHCFG0) + channel;
*mux = 0;
*mux = (source & 63) | DMAMUX_ENABLE;
CFG->DCR |= (DMA_DCR_ERQ | DMA_DCR_CS);
}

// Use another DMA channel as the trigger, causing this
// channel to trigger after each transfer is makes, except
// the its last transfer. This effectively makes the 2
// channels run in parallel until the last transfer
void triggerAtTransfersOf(DMABaseClass &ch) {
uint32_t dcr = ch.CFG->DCR;
uint32_t linkcc = (dcr >> 4) & 3;
if (linkcc == 0 || linkcc == 2) {
ch.CFG->DCR = (dcr & ~DMA_DCR_LCH1(3)) |
DMA_DCR_LINKCC(2) | DMA_DCR_LCH1(channel);
} else if (linkcc == 1) {
ch.CFG->DCR = (dcr & ~DMA_DCR_LCH1(3)) |
DMA_DCR_LCH1(channel);
} else {
uint32_t lch1 = (dcr >> 2) & 3;
ch.CFG->DCR = (dcr
& ~(DMA_DCR_LINKCC(3) | DMA_DCR_LCH2(3) | DMA_DCR_LCH1(3)))
| DMA_DCR_LINKCC(1) | DMA_DCR_LCH2(lch1) | DMA_DCR_LCH1(channel);
}
}

// Use another DMA channel as the trigger, causing this
// channel to trigger when the other channel completes.
void triggerAtCompletionOf(DMABaseClass &ch) {
uint32_t dcr = ch.CFG->DCR;
uint32_t linkcc = (dcr >> 4) & 3;
if (linkcc == 0 || linkcc == 3) {
ch.CFG->DCR = (dcr & ~DMA_DCR_LCH1(3)) |
DMA_DCR_LINKCC(3) | DMA_DCR_LCH1(channel);
} else {
ch.CFG->DCR = (dcr
& ~(DMA_DCR_LINKCC(3) | DMA_DCR_LCH2(3)))
| DMA_DCR_LINKCC(1) | DMA_DCR_LCH2(channel);
}
}

// Cause this DMA channel to be continuously triggered, so
// it will move data as rapidly as possible, without waiting.
// Normally this would be used with disableOnCompletion().
void triggerContinuously(void) {
uint32_t dcr = CFG->DCR;
dcr &= ~(DMA_DCR_ERQ | DMA_DCR_CS);
CFG->DCR = dcr;
CFG->DCR = dcr | DMA_DCR_START;
}

// Manually trigger the DMA channel.
void triggerManual(void) {
CFG->DCR = (CFG->DCR & ~DMA_DCR_ERQ) | (DMA_DCR_CS | DMA_DCR_START);
}


/***************************************/
/** Interrupts **/
/***************************************/

// An interrupt routine can be run when the DMA channel completes
// the entire transfer, and also optionally when half of the
// transfer is completed.
void attachInterrupt(void (*isr)(void)) {
_VectorsRam[channel + IRQ_DMA_CH0 + 16] = isr;
NVIC_ENABLE_IRQ(IRQ_DMA_CH0 + channel);
}

void detachInterrupt(void) {
NVIC_DISABLE_IRQ(IRQ_DMA_CH0 + channel);
}

void clearInterrupt(void) {
CFG->DSR_BCR = DMA_DSR_BCR_DONE;
}


/***************************************/
/** Enable / Disable **/
/***************************************/

void enable(void) {
}
void disable(void) {
}

/***************************************/
/** Status **/
/***************************************/

bool complete(void) {
if (CFG->DSR_BCR & DMA_DSR_BCR_DONE) return true;
return false;
}
void clearComplete(void) {
CFG->DSR_BCR |= DMA_DSR_BCR_DONE;
}
bool error(void) {
if (CFG->DSR_BCR &
(DMA_DSR_BCR_CE | DMA_DSR_BCR_BES | DMA_DSR_BCR_BED)) return true;
return false;
}
void clearError(void) {
CFG->DSR_BCR |= DMA_DSR_BCR_DONE;
}
void * sourceAddress(void) {
return (void *)(CFG->SAR);
}
void * destinationAddress(void) {
return (void *)(CFG->DAR);
}

/***************************************/
/** Direct Hardware Access **/
/***************************************/

uint8_t channel;
// CFG is accessible due to inheritance from DMABaseClass
};

// arrange the relative priority of 2 or more DMA channels
@@ -804,13 +1056,11 @@ void DMAPriorityOrder(DMAChannel &ch1, DMAChannel &ch2, DMAChannel &ch3, DMAChan



extern "C" {
#endif
extern uint16_t dma_channel_allocated_mask;
#ifdef __cplusplus
}
#endif
#endif // KINETISL

#endif // KINETISK

#endif

#endif // __cplusplus


#endif // DMAChannel_h_

+ 4
- 0
teensy3/kinetis.h View File

@@ -526,7 +526,11 @@ enum IRQ_NUMBER_t {
#define SIM_SCGC6_DMAMUX ((uint32_t)0x00000002) // DMA Mux Clock Gate Control
#define SIM_SCGC6_FTFL ((uint32_t)0x00000001) // Flash Memory Clock Gate Control
#define SIM_SCGC7 (*(volatile uint32_t *)0x40048040) // System Clock Gating Control Register 7
#if defined(KINETISK)
#define SIM_SCGC7_DMA ((uint32_t)0x00000002) // DMA Clock Gate Control
#elif defined(KINETISL)
#define SIM_SCGC7_DMA ((uint32_t)0x00000100) // DMA Clock Gate Control
#endif
#define SIM_CLKDIV1 (*(volatile uint32_t *)0x40048044) // System Clock Divider Register 1
#define SIM_CLKDIV1_OUTDIV1(n) ((uint32_t)(((n) & 0x0F) << 28)) // divide value for the core/system clock
#define SIM_CLKDIV1_OUTDIV2(n) ((uint32_t)(((n) & 0x0F) << 24)) // divide value for the peripheral clock

Loading…
Cancel
Save