update for usbd-uac2 0.1.0, dma pipeline

This commit is contained in:
2026-05-18 23:37:54 -07:00
parent 3e726010c7
commit 4e3f1f52ca
4 changed files with 634 additions and 248 deletions
Generated
+6 -9
View File
@@ -181,7 +181,7 @@ dependencies = [
"bare-metal", "bare-metal",
"bitfield 0.13.2", "bitfield 0.13.2",
"critical-section", "critical-section",
"embedded-hal 0.2.7", "embedded-hal",
"volatile-register", "volatile-register",
] ]
@@ -292,12 +292,6 @@ dependencies = [
"void", "void",
] ]
[[package]]
name = "embedded-hal"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89"
[[package]] [[package]]
name = "embedded-io" name = "embedded-io"
version = "0.7.1" version = "0.7.1"
@@ -365,7 +359,7 @@ dependencies = [
"cortex-m-rt", "cortex-m-rt",
"defmt 1.0.1", "defmt 1.0.1",
"defmt-rtt", "defmt-rtt",
"embedded-hal 1.0.0", "embedded-hal",
"embedded-io", "embedded-io",
"log-to-defmt", "log-to-defmt",
"lpc55-hal", "lpc55-hal",
@@ -491,12 +485,13 @@ dependencies = [
[[package]] [[package]]
name = "lpc55-hal" name = "lpc55-hal"
version = "0.5.0" version = "0.5.0"
source = "git+https://github.com/ktims/lpc55-hal?branch=main#8dfefd62aff4abd2de535f23107812dda68437be"
dependencies = [ dependencies = [
"block-buffer", "block-buffer",
"cipher", "cipher",
"cortex-m", "cortex-m",
"digest", "digest",
"embedded-hal 0.2.7", "embedded-hal",
"embedded-time", "embedded-time",
"generic-array 1.4.1", "generic-array 1.4.1",
"lpc55-pac", "lpc55-pac",
@@ -1084,6 +1079,8 @@ dependencies = [
[[package]] [[package]]
name = "usbd-uac2" name = "usbd-uac2"
version = "0.1.0" version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "117c289dcd316caa7aca9c0909117d8cf9d35f3ed2e7a5739067f0bcedc93e35"
dependencies = [ dependencies = [
"byteorder-embedded-io", "byteorder-embedded-io",
"defmt 1.0.1", "defmt 1.0.1",
+5 -4
View File
@@ -18,17 +18,18 @@ cortex-m = { version = "0.7.7", features = ["critical-section-single-core"] }
cortex-m-rt = "0.7.5" cortex-m-rt = "0.7.5"
defmt = "1.0.1" defmt = "1.0.1"
defmt-rtt = "1.1.0" defmt-rtt = "1.1.0"
embedded-hal = "1.0.0" embedded-hal = "0.2.7"
embedded-io = "0.7.1" embedded-io = "0.7.1"
log-to-defmt = "0.1.0" log-to-defmt = "0.1.0"
lpc55-hal = { version = "0.5.0", path = "../usbd_uac2/examples/lpc55-hal" } # Includes update to usb-device 0.3, fix for isochronous and smaller critical sections
lpc55-hal = { git = "https://github.com/ktims/lpc55-hal", branch = "main" }
nb = "1.1.0" nb = "1.1.0"
panic-halt = "1.0.0" panic-halt = "1.0.0"
panic-probe = { version = "1.0.0", features = ["print-defmt"] } panic-probe = { version = "1.0.0", features = ["print-defmt"] }
static_cell = "2.1.1" static_cell = "2.1.1"
usb-device = "0.3" usb-device = { version = "0.3", features = ["control-buffer-256"] }
usbd-hid = { version = "0.10.0", optional = true } usbd-hid = { version = "0.10.0", optional = true }
usbd-uac2 = { version = "0.1.0", path = "../usbd_uac2", features = ["defmt"]} usbd-uac2 = { version = "0.1.0", features = ["defmt"]}
[profile.release] [profile.release]
opt-level = "z" opt-level = "z"
+427
View File
@@ -0,0 +1,427 @@
use hal::Syscon;
use hal::peripherals::syscon::ClockControl;
use crate::{hal, pac};
use core::cell::UnsafeCell;
use core::convert::Infallible;
use core::ptr::copy_nonoverlapping;
use core::sync::atomic::{AtomicUsize, Ordering, compiler_fence};
pub const DMA0_FLEXCOMM7_TX: u8 = 19;
#[repr(C)]
#[derive(Copy, Clone)]
pub struct DmaDescriptor {
pub xfercfg: u32,
pub src_end: *const u8,
pub dst_end: *mut u32,
pub next: *const DmaDescriptor,
}
impl defmt::Format for DmaDescriptor {
fn format(&self, fmt: defmt::Formatter) {
defmt::write!(
fmt,
"xfercfg={:x} src_end={:x} dst_end={:x} next={:x}",
self.xfercfg,
self.src_end,
self.dst_end,
self.next
)
}
}
// Channel descriptor table; linked from SRAMBASE
#[repr(C, align(512))]
pub struct DescriptorTable {
pub d: [DmaDescriptor; 32],
}
// Our ring that we will transition to once the transfer begins
#[repr(C)]
pub struct RingDescriptors<const N: usize> {
pub d: [DmaDescriptor; N],
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct PushResult {
pub written: usize,
pub dropped: usize,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ConfigError {
SlotTooLarge,
SlotTooSmall,
SlotNotAligned,
UnsupportedWidth,
}
#[derive(Debug)]
pub enum DmaError {
Underrun,
}
impl core::error::Error for DmaError {}
impl core::fmt::Display for DmaError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str("DmaUnderrun")
}
}
/// Slot-based DMA ring
pub struct DmaRing<const N: usize, const MAX_SLOT_BYTES: usize> {
dma: pac::DMA0,
/// Destination peripheral register (FIFO write register)
dst_reg: *mut u32,
// SAFETY: only written by USB task (on start)
pub(crate) channel_desc: UnsafeCell<DescriptorTable>,
// SAFETY: only written by USB task (on start)
pub(crate) desc: UnsafeCell<RingDescriptors<N>>,
slots: UnsafeCell<[[u8; MAX_SLOT_BYTES]; N]>,
/// Effective bytes per slot. Maybe be smaller than MAX_SLOT_BYTES (e.g. at lower sample rates), as the setup is designed for constant rate not constant size.
slot_bytes: usize,
/// How many bytes to transfer to the FIFO
word_bytes: usize,
// SAFETY: producer only
write_slot: UnsafeCell<usize>,
write_off: UnsafeCell<usize>,
produced: AtomicUsize,
consumed: AtomicUsize,
/// Leave at least one slot empty so producer never overwrites a slot DMA may still read.
safety_gap: usize,
pub produced_bytes: AtomicUsize,
pub consumed_bytes: AtomicUsize,
}
impl<const N: usize, const MAX_SLOT_BYTES: usize> DmaRing<N, MAX_SLOT_BYTES> {
/// Construct using PAC DMA0 + &mut SYSCON + a destination FIFO register.
pub fn new(
dma: pac::DMA0,
syscon: &mut Syscon,
dst_reg: *mut u32,
word_bytes: usize,
) -> Result<Self, ConfigError> {
if word_bytes != 1 && word_bytes != 2 && word_bytes != 4 {
return Err(ConfigError::UnsupportedWidth);
}
// Start the DMA0 clock
dma.enable_clock(syscon);
Ok(Self {
dma,
dst_reg: dst_reg,
channel_desc: UnsafeCell::new(DescriptorTable {
d: [DmaDescriptor {
xfercfg: 0,
src_end: core::ptr::null(),
dst_end: core::ptr::null_mut(),
next: core::ptr::null(),
}; 32],
}),
desc: UnsafeCell::new(RingDescriptors {
d: [DmaDescriptor {
xfercfg: 0,
src_end: core::ptr::null(),
dst_end: core::ptr::null_mut(),
next: core::ptr::null(),
}; N],
}),
slots: UnsafeCell::new([[0u8; MAX_SLOT_BYTES]; N]),
slot_bytes: MAX_SLOT_BYTES,
word_bytes,
write_slot: UnsafeCell::new(0),
write_off: UnsafeCell::new(0),
produced: AtomicUsize::new(0),
consumed: AtomicUsize::new(0),
safety_gap: 1,
produced_bytes: AtomicUsize::new(0),
consumed_bytes: AtomicUsize::new(0),
})
}
/// Optional: adjust safety gap (defaults to 1 empty slot).
pub fn set_safety_gap(&mut self, gap_slots: usize) {
self.safety_gap = gap_slots.min(N);
}
pub fn slot_size(&self) -> usize {
self.slot_bytes
}
pub fn set_slot_size(&mut self, slot_bytes: usize) -> Result<(), ConfigError> {
if slot_bytes == 0 {
return Err(ConfigError::SlotTooSmall);
}
if slot_bytes > MAX_SLOT_BYTES {
return Err(ConfigError::SlotTooLarge);
}
if slot_bytes % self.word_bytes != 0 {
return Err(ConfigError::SlotNotAligned);
}
self.slot_bytes = slot_bytes;
self.reset_producer();
Ok(())
}
/// Producer: copy into ring; commits whole slots; reports overflow by returning dropped bytes.
pub fn push(&self, mut data: &[u8]) -> PushResult {
let mut written = 0usize;
let write_slot = unsafe { &mut *self.write_slot.get() };
let write_off = unsafe { &mut *self.write_off.get() };
let slots = unsafe { &mut *self.slots.get() };
defmt::debug!(
"produced={} consumed={} fill={}",
self.produced(),
self.consumed(),
self.fill_slots()
);
while !data.is_empty() {
if self.is_full_for_producer() {
break;
}
let cap = self.slot_bytes - *write_off;
let n = core::cmp::min(cap, data.len());
unsafe {
let dst = slots[*write_slot].as_mut_ptr().add(*write_off);
copy_nonoverlapping(data.as_ptr(), dst, n);
}
*write_off += n;
written += n;
data = &data[n..];
if *write_off == self.slot_bytes {
// publish completed slot
compiler_fence(Ordering::Release);
self.produced.fetch_add(1, Ordering::Release);
*write_slot = (*write_slot + 1) % N;
*write_off = 0;
}
}
self.produced_bytes.fetch_add(written, Ordering::Release);
PushResult {
written,
dropped: data.len(),
}
}
/// Call from DMA IRQ bookkeeping when a slot has been consumed.
pub fn advance_consumed(&self, slots: usize) -> Result<(), DmaError> {
let produced = self.produced.load(Ordering::Acquire);
let consumed = self.consumed.load(Ordering::Relaxed);
if consumed < produced {
self.consumed.fetch_add(slots, Ordering::Release);
self.consumed_bytes
.fetch_add(slots * self.slot_bytes, Ordering::Relaxed);
Ok(())
} else {
defmt::error!("DMA underrun!");
Err(DmaError::Underrun)
}
}
pub fn produced(&self) -> usize {
self.produced.load(Ordering::Acquire)
}
pub fn produced_bytes(&self) -> usize {
self.produced_bytes.load(Ordering::Acquire)
}
pub fn consumed(&self) -> usize {
self.consumed.load(Ordering::Acquire)
}
pub fn consumed_bytes(&self) -> usize {
loop {
let consumed_start = self.consumed.load(Ordering::Acquire);
let reg_1 = self.dma.channel19.xfercfg.read().bits() as usize >> 16 & 0x3ff;
let reg_2 = self.dma.channel19.xfercfg.read().bits() as usize >> 16 & 0x3ff;
let consumed_end = self.consumed.load(Ordering::Acquire);
if consumed_start == consumed_end && reg_1 == reg_2 {
// 1. Map the hardware remaining countdown into a clean byte count
let remaining_bytes = if reg_1 == 0x3ff {
0 // 0x3FF means all transfers completed, 0 bytes remaining
} else {
// Formula from NXP manual: (XFERCOUNT + 1) * Data Width
(reg_1 + 1) * self.word_bytes
};
// 2. Total bytes consumed in this specific active slot
let active_slot_consumed = self.slot_bytes - remaining_bytes;
// 3. Combine with your software index history accumulator
return consumed_start * self.slot_bytes + active_slot_consumed;
}
}
}
pub fn fill_slots(&self) -> usize {
self.produced().wrapping_sub(self.consumed())
}
pub fn init(&self) {
self.init_descriptors();
// Descriptor table base
let desc = unsafe { &*self.desc.get() };
let base = self.channel_desc.get() as u32;
self.dma.srambase.write(|w| unsafe { w.bits(base) });
self.dma
.channel19
.cfg
.write(|w| w.periphreqen().enabled().hwtrigen().disabled());
self.dma
.channel19
.xfercfg
.write(|w| unsafe { w.bits(desc.d[0].xfercfg) });
self.dma.enableclr0.write(|w| unsafe { w.bits(1 << 19) });
self.dma.ctrl.write(|w| w.enable().enabled());
self.dma.setvalid0.write(|w| unsafe { w.bits(1 << 19) });
self.dma.intenset0.write(|w| unsafe { w.bits(1 << 19) });
self.dma.settrig0.write(|w| unsafe { w.bits(1 << 19) });
}
pub fn run(&self) {
self.dma.enableset0.write(|w| unsafe { w.bits(1 << 19) });
}
pub fn stop(&self) {
self.dma.enableclr0.write(|w| unsafe { w.bits(1 << 19) });
nb::block!(if (self.dma.busy0.read().bits() & 1 << 19) == 0 {
Ok(())
} else {
Err(nb::Error::<Infallible>::WouldBlock)
});
self.dma.abort0.write(|w| unsafe { w.bits(1 << 19) });
self.reset_producer();
}
fn reset_producer(&self) {
unsafe {
*(&mut *self.write_slot.get()) = 0;
*(&mut *self.write_off.get()) = 0;
}
self.produced.store(0, Ordering::Relaxed);
self.produced_bytes.store(0, Ordering::Relaxed);
self.consumed.store(0, Ordering::Relaxed);
self.consumed_bytes.store(0, Ordering::Relaxed);
}
fn is_full_for_producer(&self) -> bool {
let fill = self.fill_slots();
fill >= N.wrapping_sub(self.safety_gap)
}
fn reset_producer_init_only(&self) {
unsafe {
*self.write_slot.get() = 0;
}
unsafe {
*self.write_off.get() = 0;
}
self.produced.store(0, Ordering::Relaxed);
self.consumed.store(0, Ordering::Relaxed);
self.produced_bytes.store(0, Ordering::Relaxed);
self.consumed_bytes.store(0, Ordering::Relaxed);
}
fn init_descriptors(&self) {
let slots = unsafe { &mut *self.slots.get() };
let desc = unsafe { &mut *self.desc.get() };
let chan_desc = unsafe { &mut *self.channel_desc.get() };
defmt::debug!("slots base: &{:x}", self.slots.get());
// Pre-fill with silence so underrun replays silence.
for i in 0..N {
slots[i][..self.slot_bytes].fill(0);
}
let transfers = (self.slot_bytes / self.word_bytes) as u32;
for i in 0..N {
let src_start = slots[i].as_ptr() as usize;
let src_end = (src_start + self.slot_bytes - self.word_bytes) as *const u8;
let next = &desc.d[(i + 1) % N] as *const DmaDescriptor;
desc.d[i] = DmaDescriptor {
xfercfg: encode_xfercfg(
true, // valid
true, // reload
false, // swtrig (we use XFERCFG SWTRIG kick)
false, // clrtrig
true, // intA
false, // intB
self.word_bytes as u32,
1, // src_inc
0, // dst_inc
transfers,
),
src_end,
dst_end: self.dst_reg,
next,
};
}
chan_desc.d[19] = desc.d[0];
chan_desc.d[19].xfercfg = 0;
// reset producer indices + counters (init-only action)
self.reset_producer_init_only();
}
}
unsafe impl<const N: usize, const MAX_SLOT_BYTES: usize> Sync for DmaRing<N, MAX_SLOT_BYTES> {}
/// XFERCFG encoding follows the common LPC DMA layout:
/// - SETINTA at bit4, SETINTB at bit5
/// - WIDTH at bits 9:8
/// - SRCINC at bits 13:12
/// - DSTINC at bits 15:14
/// - XFERCOUNT at bits 25:16
/// This layout is shown in LPC DMA examples. [5](https://www.kernel.org/doc/html/latest/core-api/dma-api-howto.html)
fn encode_xfercfg(
cfgvalid: bool,
reload: bool,
swtrig: bool,
clrtrig: bool,
inta: bool,
intb: bool,
width_bytes: u32,
src_inc: u32,
dst_inc: u32,
transfers: u32,
) -> u32 {
let width_code = match width_bytes {
1 => 0,
2 => 1,
4 => 2,
_ => 0,
};
let count_field = transfers.saturating_sub(1) & 0x3FF;
((cfgvalid as u32) << 0)
| ((reload as u32) << 1)
| ((swtrig as u32) << 2)
| ((clrtrig as u32) << 3)
| ((inta as u32) << 4)
| ((intb as u32) << 5)
| ((width_code & 0x3) << 8)
| ((src_inc & 0x3) << 12)
| ((dst_inc & 0x3) << 14)
| (count_field << 16)
}
+195 -234
View File
@@ -8,12 +8,8 @@ fn panic() -> ! {
} }
use atomic::Atomic; use atomic::Atomic;
use bbqueue::nicknames::Churrasco;
use bbqueue::prod_cons::stream::{StreamConsumer, StreamProducer};
use bbqueue::traits::bbqhdl::BbqHandle;
use bbqueue::traits::coordination::ReadGrantError;
use bytemuck::NoUninit; use bytemuck::NoUninit;
use core::sync::atomic::{AtomicBool, AtomicI32, AtomicU32, AtomicUsize, Ordering}; use core::sync::atomic::{AtomicBool, AtomicI32, AtomicUsize, Ordering};
use cortex_m_rt::entry; use cortex_m_rt::entry;
use defmt; use defmt;
use defmt::debug; use defmt::debug;
@@ -26,24 +22,23 @@ use hal::raw as pac;
use hal::time::{Hertz, Microseconds}; use hal::time::{Hertz, Microseconds};
use hal::typestates::pin::state::Gpio; use hal::typestates::pin::state::Gpio;
use lpc55_hal as hal; use lpc55_hal as hal;
use lpc55_hal::raw::NVIC;
use lpc55_hal::raw::sdif::FIFO;
use pac::interrupt; use pac::interrupt;
use static_cell::StaticCell;
use usb_device::{ use usb_device::{
bus::{self}, bus::{self},
device::{StringDescriptors, UsbDeviceBuilder, UsbVidPid}, device::{StringDescriptors, UsbVidPid},
endpoint::IsochronousSynchronizationType,
}; };
#[cfg(feature = "hid")] #[cfg(feature = "hid")]
use usbd_hid::{descriptor::SerializedDescriptor, hid_class::HIDClass}; use usbd_hid::{descriptor::SerializedDescriptor, hid_class::HIDClass};
use usbd_uac2::UsbIsochronousFeedback;
use usbd_uac2::{ use usbd_uac2::{
self, AudioClassConfig, RangeEntry, TerminalConfig, UsbAudioClass, UsbAudioClockImpl, UsbSpeed, self, AudioHandler, ClockSource, RangeEntry, TerminalConfig, UsbAudioClassConfig,
UsbAudioClassError, UsbIsochronousFeedback, UsbSpeed,
constants::{FunctionCode, TerminalType}, constants::{FunctionCode, TerminalType},
descriptors::{ChannelConfig, ClockType, FormatType1, LockDelay}, descriptors::ClockType,
}; };
use crate::dac::DacImpl; use crate::dac::DacImpl;
use crate::dma::DmaRing;
use crate::hid::AudioTelemetryReport; use crate::hid::AudioTelemetryReport;
use crate::traits::Dac; use crate::traits::Dac;
@@ -63,20 +58,26 @@ pub mod dac {
pub use self::noop::NoopDac as DacImpl; pub use self::noop::NoopDac as DacImpl;
} }
mod dma;
#[cfg(feature = "hid")] #[cfg(feature = "hid")]
mod hid; mod hid;
mod hw; mod hw;
mod traits; mod traits;
// Fo = M/(N*2*P) * Fin const BYTES_PER_SAMPLE: usize = 4; // 32 bit samples
// Fo = 3072/(125*2*8) * 16MHz = 24.576MHz const BYTES_PER_FRAME: usize = BYTES_PER_SAMPLE * 2; // 2 channels
// const FRAMES_PER_SLOT: usize = SAMPLE_RATE as usize / 2000; // run the DMA at 2khz
const FIFO_LENGTH: usize = 256; // frames const BYTES_PER_SLOT: usize = FRAMES_PER_SLOT * BYTES_PER_FRAME;
const QUEUE_RUNNING_UP: usize = (FIFO_LENGTH * 4) / 10; // 40% const N_SLOTS: usize = 8;
const QUEUE_RUNNING_DOWN: usize = (FIFO_LENGTH * 2) / 10; // 20% const FILL_TARGET_BYTES: i32 = (BYTES_PER_SLOT * N_SLOTS) as i32 / 2;
const USB_FRAME_RATE: u32 = 8000; // microframe rate: 8000 for HS, 1000 for FS
// In frames
const QUEUE_RUNNING_UP: usize = ((FRAMES_PER_SLOT * N_SLOTS) * 4) / 10; // 40%
const QUEUE_RUNNING_DOWN: usize = ((FRAMES_PER_SLOT * N_SLOTS) * 2) / 10; // 20%
const NODATA_TIMEOUT_FRAMES: usize = SAMPLE_RATE as usize / 100; // ~100ms const NODATA_TIMEOUT_FRAMES: usize = SAMPLE_RATE as usize / 100; // ~100ms
const MCLK_FREQ: u32 = 24576000; const MCLK_FREQ: u32 = 24576000;
const SAMPLE_RATE: u32 = 88200; const SAMPLE_RATE: u32 = 192000;
const HID_INTERVAL_MS: u8 = 100; const HID_INTERVAL_MS: u8 = 100;
struct CodecPins { struct CodecPins {
@@ -88,63 +89,6 @@ struct ClockSelPins {
sel_22m: Pin<pins::Pio0_31, Gpio<Output>>, sel_22m: Pin<pins::Pio0_31, Gpio<Output>>,
} }
struct Clock {
pins: ClockSelPins,
cur_rate: u32,
}
impl Clock {
const RATES: [RangeEntry<u32>; 1] = [RangeEntry::new_fixed(SAMPLE_RATE)];
}
impl UsbAudioClockImpl for Clock {
const CLOCK_TYPE: usbd_uac2::descriptors::ClockType = ClockType::InternalFixed;
const SOF_SYNC: bool = false;
fn get_sample_rate(&self) -> u32 {
self.cur_rate
}
fn set_sample_rate(
&mut self,
sample_rate: u32,
) -> core::result::Result<(), usbd_uac2::UsbAudioClassError> {
if 24_576_000u32.is_multiple_of(sample_rate) {
defmt::info!("[clock] 24M clock selected");
self.pins.sel_22m.set_low().ok();
// hal::wait_at_least(1);
self.pins.sel_24m.set_high().ok();
} else {
defmt::info!("[clock] 22M clock selected");
self.pins.sel_24m.set_low().ok();
// hal::wait_at_least(1);
self.pins.sel_22m.set_high().ok();
};
self.cur_rate = sample_rate;
Ok(())
}
fn get_rates(
&self,
) -> core::result::Result<&[usbd_uac2::RangeEntry<u32>], usbd_uac2::UsbAudioClassError> {
Ok(&Clock::RATES)
}
fn get_clock_validity(&self) -> core::result::Result<bool, usbd_uac2::UsbAudioClassError> {
Ok(true)
}
fn alt_setting(
&mut self,
alt_setting: u8,
) -> core::result::Result<(), usbd_uac2::UsbAudioClassError> {
match alt_setting {
0 => {
self.pins.sel_22m.set_low().ok();
self.pins.sel_24m.set_low().ok();
}
1 => {
self.set_sample_rate(self.cur_rate).ok();
}
_ => {}
};
Ok(())
}
}
#[derive(Default)] #[derive(Default)]
struct PerfCounters { struct PerfCounters {
received_frames: AtomicUsize, received_frames: AtomicUsize,
@@ -160,8 +104,10 @@ impl PerfCounters {
fn reset(&self) { fn reset(&self) {
self.received_frames.store(0, Ordering::Relaxed); self.received_frames.store(0, Ordering::Relaxed);
self.played_frames.store(0, Ordering::Relaxed); self.played_frames.store(0, Ordering::Relaxed);
self.min_fill.store(FIFO_LENGTH, Ordering::Relaxed); self.min_fill
self.avg_fill.store(FIFO_LENGTH / 2, Ordering::Relaxed); .store(N_SLOTS * BYTES_PER_SLOT, Ordering::Relaxed);
self.avg_fill
.store(FILL_TARGET_BYTES as usize, Ordering::Relaxed);
self.queue_underflows.store(0, Ordering::Relaxed); self.queue_underflows.store(0, Ordering::Relaxed);
self.queue_overflows.store(0, Ordering::Relaxed); self.queue_overflows.store(0, Ordering::Relaxed);
self.audio_underflows.store(0, Ordering::Relaxed); self.audio_underflows.store(0, Ordering::Relaxed);
@@ -193,91 +139,69 @@ impl defmt::Format for PerfCounters {
} }
} }
const BYTES_PER_FRAME: usize = 8;
const QUEUE_BYTES: usize = FIFO_LENGTH * BYTES_PER_FRAME;
// We use bbqueue here for performance in the USB driver that runs almost entirely in interrupt free critical section.
static QUEUE: Churrasco<QUEUE_BYTES> = Churrasco::new();
// Used for feedback calculation of current fifo state
static PRODUCED: AtomicU32 = AtomicU32::new(0);
static CONSUMED: AtomicU32 = AtomicU32::new(0);
static PERF: PerfCounters = PerfCounters { static PERF: PerfCounters = PerfCounters {
received_frames: AtomicUsize::new(0), // received from USB received_frames: AtomicUsize::new(0), // received from USB
played_frames: AtomicUsize::new(0), // played audio frames played_frames: AtomicUsize::new(0), // played audio frames
min_fill: AtomicUsize::new(FIFO_LENGTH), // not recording this for now, need to figure out how to make it meaningful, since the queue starts empty min_fill: AtomicUsize::new(0), // not recording this for now, need to figure out how to make it meaningful, since the queue starts empty
avg_fill: AtomicUsize::new(FIFO_LENGTH / 2), avg_fill: AtomicUsize::new(FILL_TARGET_BYTES as usize),
queue_underflows: AtomicUsize::new(0), // ditto here, since we underflow at startup, but we record this one as it can be trended queue_underflows: AtomicUsize::new(0), // ditto here, since we underflow at startup, but we record this one as it can be trended
queue_overflows: AtomicUsize::new(0), queue_overflows: AtomicUsize::new(0),
audio_underflows: AtomicUsize::new(0), audio_underflows: AtomicUsize::new(0),
}; };
fn cur_fill() -> u32 { static DMA_RING: StaticCell<DmaRing<N_SLOTS, BYTES_PER_SLOT>> = StaticCell::new();
PRODUCED static mut DMA_RING_REF: Option<&'static DmaRing<N_SLOTS, BYTES_PER_SLOT>> = None;
.load(Ordering::Acquire)
.wrapping_sub(CONSUMED.load(Ordering::Acquire))
/ BYTES_PER_FRAME as u32
}
#[inline] #[inline]
fn try_write_one_frame<T: BbqHandle>( fn dma_ring() -> &'static DmaRing<N_SLOTS, BYTES_PER_SLOT> {
cons: &mut StreamConsumer<T>, unsafe { DMA_RING_REF.unwrap() }
i2s: &pac::i2s7::RegisterBlock, }
) -> bool {
match cons.read() {
Ok(rgr) => {
// TODO: Fix this to handle the case where frame lands on a ring buffer boundary (if it is possible)
if rgr.len() >= BYTES_PER_FRAME {
let l = u32::from_le_bytes(rgr[0..4].try_into().unwrap());
let r = u32::from_le_bytes(rgr[4..8].try_into().unwrap());
i2s.fifowr.write(|w| unsafe { w.bits(l) }); fn cur_fill() -> usize {
i2s.fifowr.write(|w| unsafe { w.bits(r) }); let produced_bytes = dma_ring().produced_bytes() as u32;
let consumed_bytes = dma_ring().consumed_bytes() as u32;
// consume exactly one frame (8 bytes) // Handle rollover properly
rgr.release(BYTES_PER_FRAME); produced_bytes.wrapping_sub(consumed_bytes) as usize
PERF.played_frames.fetch_add(1, Ordering::Relaxed);
CONSUMED.fetch_add(BYTES_PER_FRAME as u32, Ordering::Relaxed);
return true;
} else {
// Not enough bytes for a full frame: leave it in the queue.
return false;
}
}
Err(ReadGrantError::Empty) => {
return false;
}
Err(e) => {
defmt::error!("Unexpected queue read error")
}
}
false
} }
#[interrupt] #[interrupt]
fn FLEXCOMM7() { fn DMA0() {
let i2s = unsafe { &*pac::I2S7::ptr() }; defmt::debug!("dma0");
defmt::info!("isr"); let dma = unsafe { &*pac::DMA0::ptr() };
if i2s.fifostat.read().txlvl().bits() == 0 { let inta = dma.inta0.read().bits();
// ISR was not serviced before the FIFO drained let err = dma.errint0.read().bits();
PERF.audio_underflows.fetch_add(1, Ordering::Relaxed);
// TODO: figure out how to track underflows properly
if (err & (1 << 19)) != 0 {
let live = dma.channel19.xfercfg.read().bits();
let desc = unsafe { &*dma_ring().channel_desc.get() };
let mem = desc.d[19];
defmt::error!(
"DMA error ch19: live={=u32:08x} INTA={=u32:x} ERR={=u32:x}\n desc: {}",
live,
inta,
err,
mem
);
// red_led().on();
dma.errint0.write(|w| unsafe { w.bits(1 << 19) });
} }
// refil the buffer to 4 frames / 8 samples if (inta & (1 << 19)) != 0 {
let mut cons = QUEUE.stream_consumer(); dma.inta0.write(|w| unsafe { w.bits(1 << 19) });
while i2s.fifostat.read().txlvl().bits() <= 6 { if dma_ring().advance_consumed(1).is_err() {
if !try_write_one_frame(&mut cons, i2s) { // red_led().on();
// No complete frame available: write silence to keep FIFO above threshold or we will } else {
// get stuck in the ISR. PERF.played_frames
PERF.queue_underflows.fetch_add(1, Ordering::Relaxed); .fetch_add(FRAMES_PER_SLOT, Ordering::Relaxed);
i2s.fifowr.write(|w| unsafe { w.bits(0) });
i2s.fifowr.write(|w| unsafe { w.bits(0) });
break;
} }
} }
} }
#[repr(u8)] #[repr(u8)]
#[derive(Clone, Copy, NoUninit)] #[derive(Clone, Copy, NoUninit, Eq, PartialEq)]
enum AudioState { enum AudioState {
/// Knowingly stopped, ie. AltSetting=0. DAC muted, I2S disabled. /// Knowingly stopped, ie. AltSetting=0. DAC muted, I2S disabled.
/// ///
@@ -343,7 +267,7 @@ impl FeedbackState {
self.correction_enabled.store(false, Ordering::Relaxed); self.correction_enabled.store(false, Ordering::Relaxed);
self.integrator.store(0, Ordering::Relaxed); self.integrator.store(0, Ordering::Relaxed);
self.filtered_fill self.filtered_fill
.store(FIFO_LENGTH as i32 / 2, Ordering::Relaxed); .store(FILL_TARGET_BYTES, Ordering::Relaxed);
} }
} }
impl Default for FeedbackState { impl Default for FeedbackState {
@@ -351,22 +275,25 @@ impl Default for FeedbackState {
Self { Self {
correction_enabled: AtomicBool::new(false), correction_enabled: AtomicBool::new(false),
integrator: AtomicI32::new(0), integrator: AtomicI32::new(0),
filtered_fill: AtomicI32::new(FIFO_LENGTH as i32 / 2), filtered_fill: AtomicI32::new(FILL_TARGET_BYTES),
} }
} }
} }
struct Audio<T: BbqHandle, D: Dac<I>, I> { struct Audio<'a, D: Dac<I>, I> {
state: Atomic<AudioState>, state: Atomic<AudioState>,
alt_setting: u8, alt_setting: u8,
i2s: I2sTx, i2s: I2sTx,
dac: D, dac: D,
producer: StreamProducer<T>, dma: &'a DmaRing<N_SLOTS, BYTES_PER_SLOT>,
fb: FeedbackState, fb: FeedbackState,
nodata_timeout_frame: AtomicUsize, nodata_timeout_frame: AtomicUsize,
cur_rate: u32,
clock_pins: ClockSelPins,
_marker: core::marker::PhantomData<I>, _marker: core::marker::PhantomData<I>,
} }
impl<T: BbqHandle, D: Dac<I>, I> Audio<T, D, I> { impl<D: Dac<I>, I> Audio<'_, D, I> {
const RATES: [RangeEntry<u32>; 1] = [RangeEntry::new_fixed(SAMPLE_RATE)];
/// Perform a state transition to `state` /// Perform a state transition to `state`
fn transition(&mut self, state: AudioState) { fn transition(&mut self, state: AudioState) {
defmt::info!( defmt::info!(
@@ -425,14 +352,13 @@ impl<T: BbqHandle, D: Dac<I>, I> Audio<T, D, I> {
.normal() .normal()
}); });
self.dac.init(); self.dac.init();
self.dac.change_rate(SAMPLE_RATE);
} }
///Transition -> Stopped: ///Transition -> Stopped:
///clear queue, mute DAC, mask I2S ISR, stop I2S peripheral, disable & reset feedback and performance queues ///clear queue, mute DAC, mask I2S ISR, stop I2S peripheral, disable & reset feedback and performance queues
fn stop(&mut self) { fn stop(&mut self) {
dma_ring().stop();
pac::NVIC::mask(pac::Interrupt::DMA0);
self.dac.mute(); self.dac.mute();
// Disable level interrupt on I2S
self.i2s.i2s.fifointenclr.write(|w| w.txlvl().set_bit());
// Clear any samples in the FIFO // Clear any samples in the FIFO
self.i2s.i2s.fifocfg.modify(|_, w| w.emptytx().set_bit()); self.i2s.i2s.fifocfg.modify(|_, w| w.emptytx().set_bit());
// Disable I2S // Disable I2S
@@ -441,17 +367,17 @@ impl<T: BbqHandle, D: Dac<I>, I> Audio<T, D, I> {
self.fb.reset(); self.fb.reset();
// reset performance counters // reset performance counters
PERF.reset(); PERF.reset();
// Drain anything left in the queue // Stop the clocks
while let Ok(d) = QUEUE.stream_consumer().read() { self.clock_pins.sel_22m.set_low().ok();
let len = d.len(); self.clock_pins.sel_24m.set_low().ok();
d.release(len)
}
} }
///Transition -> Armed ///Transition -> Armed
/// Start I2S peripheral. Since we assume we have interrupts disabled at /// Start I2S peripheral and MCLK. Since we assume we have interrupts disabled at
/// this point (as we came from Stopped), and the FIFO is empty, this will /// this point (as we came from Stopped), and the FIFO is empty, this will
/// play out 0s. /// play out 0s.
fn arm(&mut self) { fn arm(&mut self) {
dma_ring().init();
self.set_sample_rate(self.cur_rate).ok();
self.i2s.i2s.cfg1.modify(|_, w| w.mainenable().enabled()); self.i2s.i2s.cfg1.modify(|_, w| w.mainenable().enabled());
} }
///Transition -> Prefill ///Transition -> Prefill
@@ -468,8 +394,14 @@ impl<T: BbqHandle, D: Dac<I>, I> Audio<T, D, I> {
.i2s .i2s
.fifotrig .fifotrig
.modify(|_, w| unsafe { w.txlvl().bits(6).txlvlena().enabled() }); .modify(|_, w| unsafe { w.txlvl().bits(6).txlvlena().enabled() });
// FIFO level interrupt enable self.i2s
self.i2s.i2s.fifointenset.modify(|_, w| w.txlvl().enabled()); .i2s
.fifocfg
.modify(|_, w| w.enabletx().enabled().dmatx().enabled());
dma_ring().run();
unsafe {
pac::NVIC::unmask(pac::Interrupt::DMA0);
}
} }
///Transition->NoData ///Transition->NoData
///store framecount at transition so we can time out recovery ///store framecount at transition so we can time out recovery
@@ -480,14 +412,49 @@ impl<T: BbqHandle, D: Dac<I>, I> Audio<T, D, I> {
); );
} }
} }
impl<T: BbqHandle, D: Dac<I>, I, B: bus::UsbBus> UsbAudioClass<'_, B> for Audio<T, D, I> { impl<D: Dac<I>, I> ClockSource for Audio<'_, D, I> {
const CLOCK_TYPE: usbd_uac2::descriptors::ClockType = ClockType::InternalFixed;
const SOF_SYNC: bool = false;
fn sample_rate(&self) -> u32 {
self.cur_rate
}
fn set_sample_rate(
&mut self,
sample_rate: u32,
) -> core::result::Result<(), usbd_uac2::UsbAudioClassError> {
if 24_576_000u32.is_multiple_of(sample_rate) {
defmt::info!("[clock] 24M clock selected");
self.clock_pins.sel_22m.set_low().ok();
// hal::wait_at_least(1);
self.clock_pins.sel_24m.set_high().ok();
} else {
defmt::info!("[clock] 22M clock selected");
self.clock_pins.sel_24m.set_low().ok();
// hal::wait_at_least(1);
self.clock_pins.sel_22m.set_high().ok();
};
self.dac.change_rate(sample_rate);
self.cur_rate = sample_rate;
Ok(())
}
fn sample_rates(
&self,
) -> core::result::Result<&[usbd_uac2::RangeEntry<u32>], usbd_uac2::UsbAudioClassError> {
Ok(&Self::RATES)
}
fn clock_validity(&self) -> Result<bool, UsbAudioClassError> {
Ok(true)
}
}
impl<D: Dac<I>, I, B: bus::UsbBus> AudioHandler<'_, B> for Audio<'_, D, I> {
fn alternate_setting_changed(&mut self, _terminal: usb_device::UsbDirection, alt_setting: u8) { fn alternate_setting_changed(&mut self, _terminal: usb_device::UsbDirection, alt_setting: u8) {
let state = self.state.load(Ordering::Relaxed); let state = self.state.load(Ordering::Relaxed);
match (alt_setting, state) { match (alt_setting, state) {
(0, AudioState::Armed | AudioState::Prefill | AudioState::NoData) => { (0, AudioState::Armed | AudioState::Prefill | AudioState::NoData) => {
self.transition(AudioState::Stopped) self.transition(AudioState::Stopped)
} }
(0, AudioState::Running) => {} // noop, we naturally transition through LowData to Stopped (0, AudioState::Running | AudioState::Stopped) => {} // noop, we naturally transition through LowData to Stopped
(1, AudioState::Stopped) => self.transition(AudioState::Armed), (1, AudioState::Stopped) => self.transition(AudioState::Armed),
(1, _) => {} // altSetting 1 in any other state is a no-op (1, _) => {} // altSetting 1 in any other state is a no-op
(_, _) => { (_, _) => {
@@ -501,27 +468,32 @@ impl<T: BbqHandle, D: Dac<I>, I, B: bus::UsbBus> UsbAudioClass<'_, B> for Audio<
ep: &usb_device::endpoint::Endpoint<'_, B, usb_device::endpoint::Out>, ep: &usb_device::endpoint::Endpoint<'_, B, usb_device::endpoint::Out>,
) { ) {
let state = self.state.load(Ordering::Relaxed); let state = self.state.load(Ordering::Relaxed);
let mut buf = [0; SAMPLE_RATE as usize / 1000 * 64]; let mut buf = [0; (SAMPLE_RATE.div_ceil(USB_FRAME_RATE) + 1) as usize * BYTES_PER_FRAME];
let len = match ep.read(&mut buf) { let len = match ep.read(&mut buf) {
Ok(len) => len, Ok(len) => len,
Err(e) => { Err(_) => {
defmt::error!("usb error in rx callback {:?}", e); defmt::error!("usb error in rx callback");
return; return;
} }
}; };
let buf = &buf[..len]; let buf = &buf[..len];
let res = self.dma.push(buf);
if let Ok(mut wg) = self.producer.grant_exact(buf.len()) { if res.dropped != 0 {
wg.copy_from_slice(buf); // Overflow: some or all bytes couldn't be queued.
wg.commit(buf.len()); defmt::error!(
PRODUCED.fetch_add(buf.len() as u32, Ordering::Relaxed); "overflowed dma ring, asked {}, wrote {}, dropped {}",
PERF.received_frames buf.len(),
.fetch_add(buf.len() / BYTES_PER_FRAME, Ordering::Relaxed); res.written,
} else { res.dropped
PERF.queue_overflows.fetch_add(1, Ordering::Relaxed); );
// defmt::error!("overflowed bbq, asked {}", buf.len()); PERF.queue_overflows
.fetch_add(res.dropped / BYTES_PER_FRAME, Ordering::Relaxed);
} }
PERF.received_frames
.fetch_add(res.written / BYTES_PER_FRAME, Ordering::Relaxed);
// Valid states here are Armed, Prefill, Running, Draining and NoData // Valid states here are Armed, Prefill, Running, Draining and NoData
match state { match state {
AudioState::Stopped => { AudioState::Stopped => {
@@ -553,47 +525,44 @@ impl<T: BbqHandle, D: Dac<I>, I, B: bus::UsbBus> UsbAudioClass<'_, B> for Audio<
AudioState::NoData => self.transition(AudioState::LowData), AudioState::NoData => self.transition(AudioState::LowData),
} }
} }
fn feedback(&mut self, nominal_rate: UsbIsochronousFeedback) -> Option<UsbIsochronousFeedback> { fn audio_data_tx(
let target = FIFO_LENGTH as i32 / 2 - nominal_rate.int as i32; &mut self,
_ep: &usb_device::endpoint::Endpoint<'_, B, usb_device::endpoint::In>,
let fill = cur_fill() as i32; ) {
let prev = self.fb.filtered_fill.load(Ordering::Relaxed); }
let filtered = prev + ((fill - prev) >> 4); // ~1/16 smoothing fn feedback(&mut self, nominal_rate: UsbIsochronousFeedback) -> Option<UsbIsochronousFeedback> {
self.fb.filtered_fill.store(filtered, Ordering::Relaxed); if !self.fb.correction_enabled.load(Ordering::Relaxed) {
return Some(nominal_rate);
let error = filtered - target;
// Clamp startup excursions.
let error = error.clamp(-(nominal_rate.int as i32 * 4), nominal_rate.int as i32 * 4);
// Reset integrator when the error is small
if error.abs() < 2 {
self.fb.integrator.store(0, Ordering::Relaxed);
} }
let mut integrator = self.fb.integrator.load(Ordering::Relaxed);
integrator = integrator - (integrator >> 6); // ~1/64 leak, reduce windup
integrator = integrator.clamp(-256, 256); let current_bytes = cur_fill() as i32;
self.fb.integrator.store(integrator, Ordering::Relaxed);
// gains if current_bytes == 0 {
let p = error << 3; defmt::error!("[fb] dma underrun detected!");
let i = integrator << 2; PERF.queue_underflows.fetch_add(1, Ordering::Relaxed);
return Some(nominal_rate);
}
PERF.avg_fill
.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |v| {
Some(((v << 6) - v + current_bytes as usize) >> 6)
})
.ok();
// normalize error wrt. frame size etc.
let error_permille = ((current_bytes - FILL_TARGET_BYTES) * 1000) / FILL_TARGET_BYTES;
let correction = -((p + i) >> 2);
let nominal_v = nominal_rate.to_u32_12_13() as i32; let nominal_v = nominal_rate.to_u32_12_13() as i32;
let mut v = nominal_v + correction; // 0.2% which is a huge clock error
let max_allowed_deviation = nominal_v / 500;
// Tight clamp around nominal. let p_term = -(error_permille * nominal_v) / 256000; // this works reasonably well to keep the buffer
v = v.clamp(nominal_v - (1 << 12), nominal_v + (1 << 12)); let i_term = 0; // placeholder
defmt::debug!( let mut v = nominal_v + p_term + i_term;
"fill:{} err:{} int:{} fb:{=i32:x}", v = v.clamp(
fill, nominal_v - max_allowed_deviation,
error, nominal_v + max_allowed_deviation,
integrator,
v
); );
Some(UsbIsochronousFeedback::new(v as u32)) Some(UsbIsochronousFeedback::new(v as u32))
@@ -640,8 +609,6 @@ pub fn init_i2s(mut fc7: pac::FLEXCOMM7, i2s7: pac::I2S7, syscon: &mut Syscon) -
// Select I2S TX function // Select I2S TX function
fc7.pselid.write(|w| w.persel().i2s_transmit()); fc7.pselid.write(|w| w.persel().i2s_transmit());
unsafe { NVIC::unmask(interrupt::FLEXCOMM7) }
let regs = i2s7; let regs = i2s7;
I2sTx { i2s: regs } I2sTx { i2s: regs }
} }
@@ -740,55 +707,49 @@ fn main() -> ! {
clocks.support_usbhs_token().unwrap(), clocks.support_usbhs_token().unwrap(),
); );
defmt::info!("dma init");
let i2s_dma_addr = &i2s_peripheral.i2s.fifowr as *const _ as *mut u32;
let dma =
DmaRing::<N_SLOTS, BYTES_PER_SLOT>::new(hal.dma.release(), &mut syscon, i2s_dma_addr, 4)
.unwrap();
let dma_ref = DMA_RING.init(dma);
unsafe { DMA_RING_REF = Some(dma_ref) };
defmt::info!("audio init"); defmt::info!("audio init");
let mut audio = Audio { let mut audio = Audio {
state: Atomic::new(AudioState::Stopped), state: Atomic::new(AudioState::Stopped),
i2s: i2s_peripheral, i2s: i2s_peripheral,
dac: dac_impl, dac: dac_impl,
producer: QUEUE.stream_producer(), dma: dma_ring(),
fb: FeedbackState::default(), fb: FeedbackState::default(),
alt_setting: 0, alt_setting: 0,
nodata_timeout_frame: AtomicUsize::new(0), nodata_timeout_frame: AtomicUsize::new(0),
cur_rate: SAMPLE_RATE,
clock_pins: clock_sel_pins,
_marker: core::marker::PhantomData, _marker: core::marker::PhantomData,
}; };
audio.init(); audio.init();
let usb_bus = UsbBus::new(usb_peripheral, usb0_vbus_pin); let usb_bus = UsbBus::new(usb_peripheral, usb0_vbus_pin);
let mut clock = Clock {
pins: clock_sel_pins, let config = UsbAudioClassConfig::new(UsbSpeed::High, FunctionCode::Other, &mut audio)
cur_rate: SAMPLE_RATE, .with_output_config(
}; TerminalConfig::builder()
let config = AudioClassConfig::new(UsbSpeed::High, FunctionCode::Other, &mut clock, &mut audio) .base_id(2)
.with_output_config(TerminalConfig::new( .terminal_type(TerminalType::UsbUndefined)
2, .build(),
1, );
2,
FormatType1 {
bit_resolution: 32,
bytes_per_sample: 4,
},
TerminalType::OutHeadphones,
ChannelConfig::default_chans(2),
IsochronousSynchronizationType::Asynchronous,
LockDelay::Undefined(0),
None,
));
let mut uac2 = config.build(&usb_bus).unwrap(); let mut uac2 = config.build(&usb_bus).unwrap();
#[cfg(feature = "hid")] #[cfg(feature = "hid")]
let mut hid = HIDClass::new_ep_in(&usb_bus, AudioTelemetryReport::desc(), HID_INTERVAL_MS); let mut hid = HIDClass::new_ep_in(&usb_bus, AudioTelemetryReport::desc(), HID_INTERVAL_MS);
let mut usb_dev = UsbDeviceBuilder::new(&usb_bus, UsbVidPid(0x1209, 0xcc1d)) let mut usb_dev = usbd_uac2::builder(&usb_bus, UsbVidPid(0x1209, 0xcc1d))
.composite_with_iads()
.strings(&[StringDescriptors::default() .strings(&[StringDescriptors::default()
.manufacturer("VE7XEN") .manufacturer("VE7XEN")
.product("Guac Tortilla") .product("Guac Tortilla")])
.serial_number("123456789")])
.unwrap() .unwrap()
.max_packet_size_0(64) .max_packet_size_0(64)
.unwrap() .unwrap()
.device_class(0xef)
.device_sub_class(0x02)
.device_protocol(0x01)
.build(); .build();
#[cfg(feature = "hid")] #[cfg(feature = "hid")]