diff --git a/Cargo.lock b/Cargo.lock index dfbce63..4cf0ff6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -181,7 +181,7 @@ dependencies = [ "bare-metal", "bitfield 0.13.2", "critical-section", - "embedded-hal 0.2.7", + "embedded-hal", "volatile-register", ] @@ -292,12 +292,6 @@ dependencies = [ "void", ] -[[package]] -name = "embedded-hal" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89" - [[package]] name = "embedded-io" version = "0.7.1" @@ -365,7 +359,7 @@ dependencies = [ "cortex-m-rt", "defmt 1.0.1", "defmt-rtt", - "embedded-hal 1.0.0", + "embedded-hal", "embedded-io", "log-to-defmt", "lpc55-hal", @@ -491,12 +485,13 @@ dependencies = [ [[package]] name = "lpc55-hal" version = "0.5.0" +source = "git+https://github.com/ktims/lpc55-hal?branch=main#8dfefd62aff4abd2de535f23107812dda68437be" dependencies = [ "block-buffer", "cipher", "cortex-m", "digest", - "embedded-hal 0.2.7", + "embedded-hal", "embedded-time", "generic-array 1.4.1", "lpc55-pac", @@ -1084,6 +1079,8 @@ dependencies = [ [[package]] name = "usbd-uac2" version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117c289dcd316caa7aca9c0909117d8cf9d35f3ed2e7a5739067f0bcedc93e35" dependencies = [ "byteorder-embedded-io", "defmt 1.0.1", diff --git a/Cargo.toml b/Cargo.toml index 97912af..e79b6e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,17 +18,18 @@ cortex-m = { version = "0.7.7", features = ["critical-section-single-core"] } cortex-m-rt = "0.7.5" defmt = "1.0.1" defmt-rtt = "1.1.0" -embedded-hal = "1.0.0" +embedded-hal = "0.2.7" embedded-io = "0.7.1" log-to-defmt = "0.1.0" -lpc55-hal = { version = "0.5.0", path = "../usbd_uac2/examples/lpc55-hal" } +# Includes update to usb-device 0.3, fix for isochronous and smaller critical sections +lpc55-hal = { git = "https://github.com/ktims/lpc55-hal", branch = "main" } nb = "1.1.0" panic-halt = "1.0.0" panic-probe = { version = "1.0.0", features = ["print-defmt"] } static_cell = "2.1.1" -usb-device = "0.3" +usb-device = { version = "0.3", features = ["control-buffer-256"] } usbd-hid = { version = "0.10.0", optional = true } -usbd-uac2 = { version = "0.1.0", path = "../usbd_uac2", features = ["defmt"]} +usbd-uac2 = { version = "0.1.0", features = ["defmt"]} [profile.release] opt-level = "z" diff --git a/src/dma.rs b/src/dma.rs new file mode 100644 index 0000000..e62f21d --- /dev/null +++ b/src/dma.rs @@ -0,0 +1,427 @@ +use hal::Syscon; +use hal::peripherals::syscon::ClockControl; + +use crate::{hal, pac}; +use core::cell::UnsafeCell; +use core::convert::Infallible; +use core::ptr::copy_nonoverlapping; +use core::sync::atomic::{AtomicUsize, Ordering, compiler_fence}; + +pub const DMA0_FLEXCOMM7_TX: u8 = 19; + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct DmaDescriptor { + pub xfercfg: u32, + pub src_end: *const u8, + pub dst_end: *mut u32, + pub next: *const DmaDescriptor, +} + +impl defmt::Format for DmaDescriptor { + fn format(&self, fmt: defmt::Formatter) { + defmt::write!( + fmt, + "xfercfg={:x} src_end={:x} dst_end={:x} next={:x}", + self.xfercfg, + self.src_end, + self.dst_end, + self.next + ) + } +} + +// Channel descriptor table; linked from SRAMBASE +#[repr(C, align(512))] +pub struct DescriptorTable { + pub d: [DmaDescriptor; 32], +} +// Our ring that we will transition to once the transfer begins +#[repr(C)] +pub struct RingDescriptors { + pub d: [DmaDescriptor; N], +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct PushResult { + pub written: usize, + pub dropped: usize, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum ConfigError { + SlotTooLarge, + SlotTooSmall, + SlotNotAligned, + UnsupportedWidth, +} + +#[derive(Debug)] +pub enum DmaError { + Underrun, +} +impl core::error::Error for DmaError {} +impl core::fmt::Display for DmaError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("DmaUnderrun") + } +} + +/// Slot-based DMA ring +pub struct DmaRing { + dma: pac::DMA0, + + /// Destination peripheral register (FIFO write register) + dst_reg: *mut u32, + + // SAFETY: only written by USB task (on start) + pub(crate) channel_desc: UnsafeCell, + // SAFETY: only written by USB task (on start) + pub(crate) desc: UnsafeCell>, + slots: UnsafeCell<[[u8; MAX_SLOT_BYTES]; N]>, + + /// Effective bytes per slot. Maybe be smaller than MAX_SLOT_BYTES (e.g. at lower sample rates), as the setup is designed for constant rate not constant size. + slot_bytes: usize, + /// How many bytes to transfer to the FIFO + word_bytes: usize, + + // SAFETY: producer only + write_slot: UnsafeCell, + write_off: UnsafeCell, + + produced: AtomicUsize, + consumed: AtomicUsize, + + /// Leave at least one slot empty so producer never overwrites a slot DMA may still read. + safety_gap: usize, + pub produced_bytes: AtomicUsize, + pub consumed_bytes: AtomicUsize, +} + +impl DmaRing { + /// Construct using PAC DMA0 + &mut SYSCON + a destination FIFO register. + pub fn new( + dma: pac::DMA0, + syscon: &mut Syscon, + dst_reg: *mut u32, + word_bytes: usize, + ) -> Result { + if word_bytes != 1 && word_bytes != 2 && word_bytes != 4 { + return Err(ConfigError::UnsupportedWidth); + } + // Start the DMA0 clock + dma.enable_clock(syscon); + + Ok(Self { + dma, + dst_reg: dst_reg, + channel_desc: UnsafeCell::new(DescriptorTable { + d: [DmaDescriptor { + xfercfg: 0, + src_end: core::ptr::null(), + dst_end: core::ptr::null_mut(), + next: core::ptr::null(), + }; 32], + }), + desc: UnsafeCell::new(RingDescriptors { + d: [DmaDescriptor { + xfercfg: 0, + src_end: core::ptr::null(), + dst_end: core::ptr::null_mut(), + next: core::ptr::null(), + }; N], + }), + slots: UnsafeCell::new([[0u8; MAX_SLOT_BYTES]; N]), + slot_bytes: MAX_SLOT_BYTES, + word_bytes, + write_slot: UnsafeCell::new(0), + write_off: UnsafeCell::new(0), + produced: AtomicUsize::new(0), + consumed: AtomicUsize::new(0), + safety_gap: 1, + produced_bytes: AtomicUsize::new(0), + consumed_bytes: AtomicUsize::new(0), + }) + } + + /// Optional: adjust safety gap (defaults to 1 empty slot). + pub fn set_safety_gap(&mut self, gap_slots: usize) { + self.safety_gap = gap_slots.min(N); + } + pub fn slot_size(&self) -> usize { + self.slot_bytes + } + pub fn set_slot_size(&mut self, slot_bytes: usize) -> Result<(), ConfigError> { + if slot_bytes == 0 { + return Err(ConfigError::SlotTooSmall); + } + if slot_bytes > MAX_SLOT_BYTES { + return Err(ConfigError::SlotTooLarge); + } + if slot_bytes % self.word_bytes != 0 { + return Err(ConfigError::SlotNotAligned); + } + self.slot_bytes = slot_bytes; + self.reset_producer(); + Ok(()) + } + + /// Producer: copy into ring; commits whole slots; reports overflow by returning dropped bytes. + pub fn push(&self, mut data: &[u8]) -> PushResult { + let mut written = 0usize; + + let write_slot = unsafe { &mut *self.write_slot.get() }; + let write_off = unsafe { &mut *self.write_off.get() }; + + let slots = unsafe { &mut *self.slots.get() }; + defmt::debug!( + "produced={} consumed={} fill={}", + self.produced(), + self.consumed(), + self.fill_slots() + ); + while !data.is_empty() { + if self.is_full_for_producer() { + break; + } + + let cap = self.slot_bytes - *write_off; + let n = core::cmp::min(cap, data.len()); + + unsafe { + let dst = slots[*write_slot].as_mut_ptr().add(*write_off); + copy_nonoverlapping(data.as_ptr(), dst, n); + } + + *write_off += n; + written += n; + data = &data[n..]; + + if *write_off == self.slot_bytes { + // publish completed slot + compiler_fence(Ordering::Release); + self.produced.fetch_add(1, Ordering::Release); + + *write_slot = (*write_slot + 1) % N; + *write_off = 0; + } + } + + self.produced_bytes.fetch_add(written, Ordering::Release); + + PushResult { + written, + dropped: data.len(), + } + } + + /// Call from DMA IRQ bookkeeping when a slot has been consumed. + pub fn advance_consumed(&self, slots: usize) -> Result<(), DmaError> { + let produced = self.produced.load(Ordering::Acquire); + let consumed = self.consumed.load(Ordering::Relaxed); + if consumed < produced { + self.consumed.fetch_add(slots, Ordering::Release); + self.consumed_bytes + .fetch_add(slots * self.slot_bytes, Ordering::Relaxed); + Ok(()) + } else { + defmt::error!("DMA underrun!"); + Err(DmaError::Underrun) + } + } + + pub fn produced(&self) -> usize { + self.produced.load(Ordering::Acquire) + } + pub fn produced_bytes(&self) -> usize { + self.produced_bytes.load(Ordering::Acquire) + } + pub fn consumed(&self) -> usize { + self.consumed.load(Ordering::Acquire) + } + pub fn consumed_bytes(&self) -> usize { + loop { + let consumed_start = self.consumed.load(Ordering::Acquire); + + let reg_1 = self.dma.channel19.xfercfg.read().bits() as usize >> 16 & 0x3ff; + let reg_2 = self.dma.channel19.xfercfg.read().bits() as usize >> 16 & 0x3ff; + + let consumed_end = self.consumed.load(Ordering::Acquire); + + if consumed_start == consumed_end && reg_1 == reg_2 { + // 1. Map the hardware remaining countdown into a clean byte count + let remaining_bytes = if reg_1 == 0x3ff { + 0 // 0x3FF means all transfers completed, 0 bytes remaining + } else { + // Formula from NXP manual: (XFERCOUNT + 1) * Data Width + (reg_1 + 1) * self.word_bytes + }; + + // 2. Total bytes consumed in this specific active slot + let active_slot_consumed = self.slot_bytes - remaining_bytes; + + // 3. Combine with your software index history accumulator + return consumed_start * self.slot_bytes + active_slot_consumed; + } + } + } + + pub fn fill_slots(&self) -> usize { + self.produced().wrapping_sub(self.consumed()) + } + + pub fn init(&self) { + self.init_descriptors(); + + // Descriptor table base + let desc = unsafe { &*self.desc.get() }; + let base = self.channel_desc.get() as u32; + self.dma.srambase.write(|w| unsafe { w.bits(base) }); + self.dma + .channel19 + .cfg + .write(|w| w.periphreqen().enabled().hwtrigen().disabled()); + self.dma + .channel19 + .xfercfg + .write(|w| unsafe { w.bits(desc.d[0].xfercfg) }); + + self.dma.enableclr0.write(|w| unsafe { w.bits(1 << 19) }); + self.dma.ctrl.write(|w| w.enable().enabled()); + self.dma.setvalid0.write(|w| unsafe { w.bits(1 << 19) }); + self.dma.intenset0.write(|w| unsafe { w.bits(1 << 19) }); + + self.dma.settrig0.write(|w| unsafe { w.bits(1 << 19) }); + } + + pub fn run(&self) { + self.dma.enableset0.write(|w| unsafe { w.bits(1 << 19) }); + } + + pub fn stop(&self) { + self.dma.enableclr0.write(|w| unsafe { w.bits(1 << 19) }); + nb::block!(if (self.dma.busy0.read().bits() & 1 << 19) == 0 { + Ok(()) + } else { + Err(nb::Error::::WouldBlock) + }); + self.dma.abort0.write(|w| unsafe { w.bits(1 << 19) }); + self.reset_producer(); + } + + fn reset_producer(&self) { + unsafe { + *(&mut *self.write_slot.get()) = 0; + *(&mut *self.write_off.get()) = 0; + } + self.produced.store(0, Ordering::Relaxed); + self.produced_bytes.store(0, Ordering::Relaxed); + self.consumed.store(0, Ordering::Relaxed); + self.consumed_bytes.store(0, Ordering::Relaxed); + } + + fn is_full_for_producer(&self) -> bool { + let fill = self.fill_slots(); + fill >= N.wrapping_sub(self.safety_gap) + } + fn reset_producer_init_only(&self) { + unsafe { + *self.write_slot.get() = 0; + } + unsafe { + *self.write_off.get() = 0; + } + + self.produced.store(0, Ordering::Relaxed); + self.consumed.store(0, Ordering::Relaxed); + + self.produced_bytes.store(0, Ordering::Relaxed); + self.consumed_bytes.store(0, Ordering::Relaxed); + } + + fn init_descriptors(&self) { + let slots = unsafe { &mut *self.slots.get() }; + let desc = unsafe { &mut *self.desc.get() }; + let chan_desc = unsafe { &mut *self.channel_desc.get() }; + defmt::debug!("slots base: &{:x}", self.slots.get()); + + // Pre-fill with silence so underrun replays silence. + for i in 0..N { + slots[i][..self.slot_bytes].fill(0); + } + + let transfers = (self.slot_bytes / self.word_bytes) as u32; + + for i in 0..N { + let src_start = slots[i].as_ptr() as usize; + let src_end = (src_start + self.slot_bytes - self.word_bytes) as *const u8; + + let next = &desc.d[(i + 1) % N] as *const DmaDescriptor; + + desc.d[i] = DmaDescriptor { + xfercfg: encode_xfercfg( + true, // valid + true, // reload + false, // swtrig (we use XFERCFG SWTRIG kick) + false, // clrtrig + true, // intA + false, // intB + self.word_bytes as u32, + 1, // src_inc + 0, // dst_inc + transfers, + ), + src_end, + dst_end: self.dst_reg, + next, + }; + } + chan_desc.d[19] = desc.d[0]; + chan_desc.d[19].xfercfg = 0; + + // reset producer indices + counters (init-only action) + self.reset_producer_init_only(); + } +} + +unsafe impl Sync for DmaRing {} + +/// XFERCFG encoding follows the common LPC DMA layout: +/// - SETINTA at bit4, SETINTB at bit5 +/// - WIDTH at bits 9:8 +/// - SRCINC at bits 13:12 +/// - DSTINC at bits 15:14 +/// - XFERCOUNT at bits 25:16 +/// This layout is shown in LPC DMA examples. [5](https://www.kernel.org/doc/html/latest/core-api/dma-api-howto.html) +fn encode_xfercfg( + cfgvalid: bool, + reload: bool, + swtrig: bool, + clrtrig: bool, + inta: bool, + intb: bool, + width_bytes: u32, + src_inc: u32, + dst_inc: u32, + transfers: u32, +) -> u32 { + let width_code = match width_bytes { + 1 => 0, + 2 => 1, + 4 => 2, + _ => 0, + }; + + let count_field = transfers.saturating_sub(1) & 0x3FF; + + ((cfgvalid as u32) << 0) + | ((reload as u32) << 1) + | ((swtrig as u32) << 2) + | ((clrtrig as u32) << 3) + | ((inta as u32) << 4) + | ((intb as u32) << 5) + | ((width_code & 0x3) << 8) + | ((src_inc & 0x3) << 12) + | ((dst_inc & 0x3) << 14) + | (count_field << 16) +} diff --git a/src/main.rs b/src/main.rs index 5116160..b86ed0d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,12 +8,8 @@ fn panic() -> ! { } use atomic::Atomic; -use bbqueue::nicknames::Churrasco; -use bbqueue::prod_cons::stream::{StreamConsumer, StreamProducer}; -use bbqueue::traits::bbqhdl::BbqHandle; -use bbqueue::traits::coordination::ReadGrantError; use bytemuck::NoUninit; -use core::sync::atomic::{AtomicBool, AtomicI32, AtomicU32, AtomicUsize, Ordering}; +use core::sync::atomic::{AtomicBool, AtomicI32, AtomicUsize, Ordering}; use cortex_m_rt::entry; use defmt; use defmt::debug; @@ -26,24 +22,23 @@ use hal::raw as pac; use hal::time::{Hertz, Microseconds}; use hal::typestates::pin::state::Gpio; use lpc55_hal as hal; -use lpc55_hal::raw::NVIC; -use lpc55_hal::raw::sdif::FIFO; use pac::interrupt; +use static_cell::StaticCell; use usb_device::{ bus::{self}, - device::{StringDescriptors, UsbDeviceBuilder, UsbVidPid}, - endpoint::IsochronousSynchronizationType, + device::{StringDescriptors, UsbVidPid}, }; #[cfg(feature = "hid")] use usbd_hid::{descriptor::SerializedDescriptor, hid_class::HIDClass}; -use usbd_uac2::UsbIsochronousFeedback; use usbd_uac2::{ - self, AudioClassConfig, RangeEntry, TerminalConfig, UsbAudioClass, UsbAudioClockImpl, UsbSpeed, + self, AudioHandler, ClockSource, RangeEntry, TerminalConfig, UsbAudioClassConfig, + UsbAudioClassError, UsbIsochronousFeedback, UsbSpeed, constants::{FunctionCode, TerminalType}, - descriptors::{ChannelConfig, ClockType, FormatType1, LockDelay}, + descriptors::ClockType, }; use crate::dac::DacImpl; +use crate::dma::DmaRing; use crate::hid::AudioTelemetryReport; use crate::traits::Dac; @@ -63,20 +58,26 @@ pub mod dac { pub use self::noop::NoopDac as DacImpl; } +mod dma; #[cfg(feature = "hid")] mod hid; mod hw; mod traits; -// Fo = M/(N*2*P) * Fin -// Fo = 3072/(125*2*8) * 16MHz = 24.576MHz -// -const FIFO_LENGTH: usize = 256; // frames -const QUEUE_RUNNING_UP: usize = (FIFO_LENGTH * 4) / 10; // 40% -const QUEUE_RUNNING_DOWN: usize = (FIFO_LENGTH * 2) / 10; // 20% +const BYTES_PER_SAMPLE: usize = 4; // 32 bit samples +const BYTES_PER_FRAME: usize = BYTES_PER_SAMPLE * 2; // 2 channels +const FRAMES_PER_SLOT: usize = SAMPLE_RATE as usize / 2000; // run the DMA at 2khz +const BYTES_PER_SLOT: usize = FRAMES_PER_SLOT * BYTES_PER_FRAME; +const N_SLOTS: usize = 8; +const FILL_TARGET_BYTES: i32 = (BYTES_PER_SLOT * N_SLOTS) as i32 / 2; +const USB_FRAME_RATE: u32 = 8000; // microframe rate: 8000 for HS, 1000 for FS + +// In frames +const QUEUE_RUNNING_UP: usize = ((FRAMES_PER_SLOT * N_SLOTS) * 4) / 10; // 40% +const QUEUE_RUNNING_DOWN: usize = ((FRAMES_PER_SLOT * N_SLOTS) * 2) / 10; // 20% const NODATA_TIMEOUT_FRAMES: usize = SAMPLE_RATE as usize / 100; // ~100ms const MCLK_FREQ: u32 = 24576000; -const SAMPLE_RATE: u32 = 88200; +const SAMPLE_RATE: u32 = 192000; const HID_INTERVAL_MS: u8 = 100; struct CodecPins { @@ -88,63 +89,6 @@ struct ClockSelPins { sel_22m: Pin>, } -struct Clock { - pins: ClockSelPins, - cur_rate: u32, -} -impl Clock { - const RATES: [RangeEntry; 1] = [RangeEntry::new_fixed(SAMPLE_RATE)]; -} -impl UsbAudioClockImpl for Clock { - const CLOCK_TYPE: usbd_uac2::descriptors::ClockType = ClockType::InternalFixed; - const SOF_SYNC: bool = false; - fn get_sample_rate(&self) -> u32 { - self.cur_rate - } - fn set_sample_rate( - &mut self, - sample_rate: u32, - ) -> core::result::Result<(), usbd_uac2::UsbAudioClassError> { - if 24_576_000u32.is_multiple_of(sample_rate) { - defmt::info!("[clock] 24M clock selected"); - self.pins.sel_22m.set_low().ok(); - // hal::wait_at_least(1); - self.pins.sel_24m.set_high().ok(); - } else { - defmt::info!("[clock] 22M clock selected"); - self.pins.sel_24m.set_low().ok(); - // hal::wait_at_least(1); - self.pins.sel_22m.set_high().ok(); - }; - self.cur_rate = sample_rate; - Ok(()) - } - fn get_rates( - &self, - ) -> core::result::Result<&[usbd_uac2::RangeEntry], usbd_uac2::UsbAudioClassError> { - Ok(&Clock::RATES) - } - fn get_clock_validity(&self) -> core::result::Result { - Ok(true) - } - fn alt_setting( - &mut self, - alt_setting: u8, - ) -> core::result::Result<(), usbd_uac2::UsbAudioClassError> { - match alt_setting { - 0 => { - self.pins.sel_22m.set_low().ok(); - self.pins.sel_24m.set_low().ok(); - } - 1 => { - self.set_sample_rate(self.cur_rate).ok(); - } - _ => {} - }; - Ok(()) - } -} - #[derive(Default)] struct PerfCounters { received_frames: AtomicUsize, @@ -160,8 +104,10 @@ impl PerfCounters { fn reset(&self) { self.received_frames.store(0, Ordering::Relaxed); self.played_frames.store(0, Ordering::Relaxed); - self.min_fill.store(FIFO_LENGTH, Ordering::Relaxed); - self.avg_fill.store(FIFO_LENGTH / 2, Ordering::Relaxed); + self.min_fill + .store(N_SLOTS * BYTES_PER_SLOT, Ordering::Relaxed); + self.avg_fill + .store(FILL_TARGET_BYTES as usize, Ordering::Relaxed); self.queue_underflows.store(0, Ordering::Relaxed); self.queue_overflows.store(0, Ordering::Relaxed); self.audio_underflows.store(0, Ordering::Relaxed); @@ -193,91 +139,69 @@ impl defmt::Format for PerfCounters { } } -const BYTES_PER_FRAME: usize = 8; -const QUEUE_BYTES: usize = FIFO_LENGTH * BYTES_PER_FRAME; -// We use bbqueue here for performance in the USB driver that runs almost entirely in interrupt free critical section. -static QUEUE: Churrasco = Churrasco::new(); -// Used for feedback calculation of current fifo state -static PRODUCED: AtomicU32 = AtomicU32::new(0); -static CONSUMED: AtomicU32 = AtomicU32::new(0); - static PERF: PerfCounters = PerfCounters { - received_frames: AtomicUsize::new(0), // received from USB - played_frames: AtomicUsize::new(0), // played audio frames - min_fill: AtomicUsize::new(FIFO_LENGTH), // not recording this for now, need to figure out how to make it meaningful, since the queue starts empty - avg_fill: AtomicUsize::new(FIFO_LENGTH / 2), + received_frames: AtomicUsize::new(0), // received from USB + played_frames: AtomicUsize::new(0), // played audio frames + min_fill: AtomicUsize::new(0), // not recording this for now, need to figure out how to make it meaningful, since the queue starts empty + avg_fill: AtomicUsize::new(FILL_TARGET_BYTES as usize), queue_underflows: AtomicUsize::new(0), // ditto here, since we underflow at startup, but we record this one as it can be trended queue_overflows: AtomicUsize::new(0), audio_underflows: AtomicUsize::new(0), }; -fn cur_fill() -> u32 { - PRODUCED - .load(Ordering::Acquire) - .wrapping_sub(CONSUMED.load(Ordering::Acquire)) - / BYTES_PER_FRAME as u32 +static DMA_RING: StaticCell> = StaticCell::new(); +static mut DMA_RING_REF: Option<&'static DmaRing> = None; +#[inline] +fn dma_ring() -> &'static DmaRing { + unsafe { DMA_RING_REF.unwrap() } } -#[inline] -fn try_write_one_frame( - cons: &mut StreamConsumer, - i2s: &pac::i2s7::RegisterBlock, -) -> bool { - match cons.read() { - Ok(rgr) => { - // TODO: Fix this to handle the case where frame lands on a ring buffer boundary (if it is possible) - if rgr.len() >= BYTES_PER_FRAME { - let l = u32::from_le_bytes(rgr[0..4].try_into().unwrap()); - let r = u32::from_le_bytes(rgr[4..8].try_into().unwrap()); +fn cur_fill() -> usize { + let produced_bytes = dma_ring().produced_bytes() as u32; + let consumed_bytes = dma_ring().consumed_bytes() as u32; - i2s.fifowr.write(|w| unsafe { w.bits(l) }); - i2s.fifowr.write(|w| unsafe { w.bits(r) }); - - // consume exactly one frame (8 bytes) - rgr.release(BYTES_PER_FRAME); - PERF.played_frames.fetch_add(1, Ordering::Relaxed); - CONSUMED.fetch_add(BYTES_PER_FRAME as u32, Ordering::Relaxed); - return true; - } else { - // Not enough bytes for a full frame: leave it in the queue. - return false; - } - } - Err(ReadGrantError::Empty) => { - return false; - } - Err(e) => { - defmt::error!("Unexpected queue read error") - } - } - false + // Handle rollover properly + produced_bytes.wrapping_sub(consumed_bytes) as usize } #[interrupt] -fn FLEXCOMM7() { - let i2s = unsafe { &*pac::I2S7::ptr() }; - defmt::info!("isr"); +fn DMA0() { + defmt::debug!("dma0"); + let dma = unsafe { &*pac::DMA0::ptr() }; - if i2s.fifostat.read().txlvl().bits() == 0 { - // ISR was not serviced before the FIFO drained - PERF.audio_underflows.fetch_add(1, Ordering::Relaxed); + let inta = dma.inta0.read().bits(); + let err = dma.errint0.read().bits(); + + // TODO: figure out how to track underflows properly + if (err & (1 << 19)) != 0 { + let live = dma.channel19.xfercfg.read().bits(); + + let desc = unsafe { &*dma_ring().channel_desc.get() }; + let mem = desc.d[19]; + defmt::error!( + "DMA error ch19: live={=u32:08x} INTA={=u32:x} ERR={=u32:x}\n desc: {}", + live, + inta, + err, + mem + ); + // red_led().on(); + dma.errint0.write(|w| unsafe { w.bits(1 << 19) }); } - // refil the buffer to 4 frames / 8 samples - let mut cons = QUEUE.stream_consumer(); - while i2s.fifostat.read().txlvl().bits() <= 6 { - if !try_write_one_frame(&mut cons, i2s) { - // No complete frame available: write silence to keep FIFO above threshold or we will - // get stuck in the ISR. - PERF.queue_underflows.fetch_add(1, Ordering::Relaxed); - i2s.fifowr.write(|w| unsafe { w.bits(0) }); - i2s.fifowr.write(|w| unsafe { w.bits(0) }); - break; + if (inta & (1 << 19)) != 0 { + dma.inta0.write(|w| unsafe { w.bits(1 << 19) }); + if dma_ring().advance_consumed(1).is_err() { + // red_led().on(); + } else { + PERF.played_frames + .fetch_add(FRAMES_PER_SLOT, Ordering::Relaxed); } } } + #[repr(u8)] -#[derive(Clone, Copy, NoUninit)] +#[derive(Clone, Copy, NoUninit, Eq, PartialEq)] enum AudioState { /// Knowingly stopped, ie. AltSetting=0. DAC muted, I2S disabled. /// @@ -343,7 +267,7 @@ impl FeedbackState { self.correction_enabled.store(false, Ordering::Relaxed); self.integrator.store(0, Ordering::Relaxed); self.filtered_fill - .store(FIFO_LENGTH as i32 / 2, Ordering::Relaxed); + .store(FILL_TARGET_BYTES, Ordering::Relaxed); } } impl Default for FeedbackState { @@ -351,22 +275,25 @@ impl Default for FeedbackState { Self { correction_enabled: AtomicBool::new(false), integrator: AtomicI32::new(0), - filtered_fill: AtomicI32::new(FIFO_LENGTH as i32 / 2), + filtered_fill: AtomicI32::new(FILL_TARGET_BYTES), } } } -struct Audio, I> { +struct Audio<'a, D: Dac, I> { state: Atomic, alt_setting: u8, i2s: I2sTx, dac: D, - producer: StreamProducer, + dma: &'a DmaRing, fb: FeedbackState, nodata_timeout_frame: AtomicUsize, + cur_rate: u32, + clock_pins: ClockSelPins, _marker: core::marker::PhantomData, } -impl, I> Audio { +impl, I> Audio<'_, D, I> { + const RATES: [RangeEntry; 1] = [RangeEntry::new_fixed(SAMPLE_RATE)]; /// Perform a state transition to `state` fn transition(&mut self, state: AudioState) { defmt::info!( @@ -425,14 +352,13 @@ impl, I> Audio { .normal() }); self.dac.init(); - self.dac.change_rate(SAMPLE_RATE); } ///Transition -> Stopped: ///clear queue, mute DAC, mask I2S ISR, stop I2S peripheral, disable & reset feedback and performance queues fn stop(&mut self) { + dma_ring().stop(); + pac::NVIC::mask(pac::Interrupt::DMA0); self.dac.mute(); - // Disable level interrupt on I2S - self.i2s.i2s.fifointenclr.write(|w| w.txlvl().set_bit()); // Clear any samples in the FIFO self.i2s.i2s.fifocfg.modify(|_, w| w.emptytx().set_bit()); // Disable I2S @@ -441,17 +367,17 @@ impl, I> Audio { self.fb.reset(); // reset performance counters PERF.reset(); - // Drain anything left in the queue - while let Ok(d) = QUEUE.stream_consumer().read() { - let len = d.len(); - d.release(len) - } + // Stop the clocks + self.clock_pins.sel_22m.set_low().ok(); + self.clock_pins.sel_24m.set_low().ok(); } ///Transition -> Armed - /// Start I2S peripheral. Since we assume we have interrupts disabled at + /// Start I2S peripheral and MCLK. Since we assume we have interrupts disabled at /// this point (as we came from Stopped), and the FIFO is empty, this will /// play out 0s. fn arm(&mut self) { + dma_ring().init(); + self.set_sample_rate(self.cur_rate).ok(); self.i2s.i2s.cfg1.modify(|_, w| w.mainenable().enabled()); } ///Transition -> Prefill @@ -468,8 +394,14 @@ impl, I> Audio { .i2s .fifotrig .modify(|_, w| unsafe { w.txlvl().bits(6).txlvlena().enabled() }); - // FIFO level interrupt enable - self.i2s.i2s.fifointenset.modify(|_, w| w.txlvl().enabled()); + self.i2s + .i2s + .fifocfg + .modify(|_, w| w.enabletx().enabled().dmatx().enabled()); + dma_ring().run(); + unsafe { + pac::NVIC::unmask(pac::Interrupt::DMA0); + } } ///Transition->NoData ///store framecount at transition so we can time out recovery @@ -480,14 +412,49 @@ impl, I> Audio { ); } } -impl, I, B: bus::UsbBus> UsbAudioClass<'_, B> for Audio { +impl, I> ClockSource for Audio<'_, D, I> { + const CLOCK_TYPE: usbd_uac2::descriptors::ClockType = ClockType::InternalFixed; + const SOF_SYNC: bool = false; + + fn sample_rate(&self) -> u32 { + self.cur_rate + } + fn set_sample_rate( + &mut self, + sample_rate: u32, + ) -> core::result::Result<(), usbd_uac2::UsbAudioClassError> { + if 24_576_000u32.is_multiple_of(sample_rate) { + defmt::info!("[clock] 24M clock selected"); + self.clock_pins.sel_22m.set_low().ok(); + // hal::wait_at_least(1); + self.clock_pins.sel_24m.set_high().ok(); + } else { + defmt::info!("[clock] 22M clock selected"); + self.clock_pins.sel_24m.set_low().ok(); + // hal::wait_at_least(1); + self.clock_pins.sel_22m.set_high().ok(); + }; + self.dac.change_rate(sample_rate); + self.cur_rate = sample_rate; + Ok(()) + } + fn sample_rates( + &self, + ) -> core::result::Result<&[usbd_uac2::RangeEntry], usbd_uac2::UsbAudioClassError> { + Ok(&Self::RATES) + } + fn clock_validity(&self) -> Result { + Ok(true) + } +} +impl, I, B: bus::UsbBus> AudioHandler<'_, B> for Audio<'_, D, I> { fn alternate_setting_changed(&mut self, _terminal: usb_device::UsbDirection, alt_setting: u8) { let state = self.state.load(Ordering::Relaxed); match (alt_setting, state) { (0, AudioState::Armed | AudioState::Prefill | AudioState::NoData) => { self.transition(AudioState::Stopped) } - (0, AudioState::Running) => {} // noop, we naturally transition through LowData to Stopped + (0, AudioState::Running | AudioState::Stopped) => {} // noop, we naturally transition through LowData to Stopped (1, AudioState::Stopped) => self.transition(AudioState::Armed), (1, _) => {} // altSetting 1 in any other state is a no-op (_, _) => { @@ -501,27 +468,32 @@ impl, I, B: bus::UsbBus> UsbAudioClass<'_, B> for Audio< ep: &usb_device::endpoint::Endpoint<'_, B, usb_device::endpoint::Out>, ) { let state = self.state.load(Ordering::Relaxed); - let mut buf = [0; SAMPLE_RATE as usize / 1000 * 64]; + let mut buf = [0; (SAMPLE_RATE.div_ceil(USB_FRAME_RATE) + 1) as usize * BYTES_PER_FRAME]; let len = match ep.read(&mut buf) { Ok(len) => len, - Err(e) => { - defmt::error!("usb error in rx callback {:?}", e); + Err(_) => { + defmt::error!("usb error in rx callback"); return; } }; let buf = &buf[..len]; + let res = self.dma.push(buf); - if let Ok(mut wg) = self.producer.grant_exact(buf.len()) { - wg.copy_from_slice(buf); - wg.commit(buf.len()); - PRODUCED.fetch_add(buf.len() as u32, Ordering::Relaxed); - PERF.received_frames - .fetch_add(buf.len() / BYTES_PER_FRAME, Ordering::Relaxed); - } else { - PERF.queue_overflows.fetch_add(1, Ordering::Relaxed); - // defmt::error!("overflowed bbq, asked {}", buf.len()); + if res.dropped != 0 { + // Overflow: some or all bytes couldn't be queued. + defmt::error!( + "overflowed dma ring, asked {}, wrote {}, dropped {}", + buf.len(), + res.written, + res.dropped + ); + PERF.queue_overflows + .fetch_add(res.dropped / BYTES_PER_FRAME, Ordering::Relaxed); } + PERF.received_frames + .fetch_add(res.written / BYTES_PER_FRAME, Ordering::Relaxed); + // Valid states here are Armed, Prefill, Running, Draining and NoData match state { AudioState::Stopped => { @@ -553,47 +525,44 @@ impl, I, B: bus::UsbBus> UsbAudioClass<'_, B> for Audio< AudioState::NoData => self.transition(AudioState::LowData), } } + fn audio_data_tx( + &mut self, + _ep: &usb_device::endpoint::Endpoint<'_, B, usb_device::endpoint::In>, + ) { + } fn feedback(&mut self, nominal_rate: UsbIsochronousFeedback) -> Option { - let target = FIFO_LENGTH as i32 / 2 - nominal_rate.int as i32; - - let fill = cur_fill() as i32; - let prev = self.fb.filtered_fill.load(Ordering::Relaxed); - let filtered = prev + ((fill - prev) >> 4); // ~1/16 smoothing - self.fb.filtered_fill.store(filtered, Ordering::Relaxed); - - let error = filtered - target; - - // Clamp startup excursions. - let error = error.clamp(-(nominal_rate.int as i32 * 4), nominal_rate.int as i32 * 4); - - // Reset integrator when the error is small - if error.abs() < 2 { - self.fb.integrator.store(0, Ordering::Relaxed); + if !self.fb.correction_enabled.load(Ordering::Relaxed) { + return Some(nominal_rate); } - let mut integrator = self.fb.integrator.load(Ordering::Relaxed); - integrator = integrator - (integrator >> 6); // ~1/64 leak, reduce windup - integrator = integrator.clamp(-256, 256); - self.fb.integrator.store(integrator, Ordering::Relaxed); + let current_bytes = cur_fill() as i32; - // gains - let p = error << 3; - let i = integrator << 2; + if current_bytes == 0 { + defmt::error!("[fb] dma underrun detected!"); + PERF.queue_underflows.fetch_add(1, Ordering::Relaxed); + return Some(nominal_rate); + } + PERF.avg_fill + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |v| { + Some(((v << 6) - v + current_bytes as usize) >> 6) + }) + .ok(); + + // normalize error wrt. frame size etc. + let error_permille = ((current_bytes - FILL_TARGET_BYTES) * 1000) / FILL_TARGET_BYTES; - let correction = -((p + i) >> 2); let nominal_v = nominal_rate.to_u32_12_13() as i32; - let mut v = nominal_v + correction; + // 0.2% which is a huge clock error + let max_allowed_deviation = nominal_v / 500; - // Tight clamp around nominal. - v = v.clamp(nominal_v - (1 << 12), nominal_v + (1 << 12)); + let p_term = -(error_permille * nominal_v) / 256000; // this works reasonably well to keep the buffer + let i_term = 0; // placeholder - defmt::debug!( - "fill:{} err:{} int:{} fb:{=i32:x}", - fill, - error, - integrator, - v + let mut v = nominal_v + p_term + i_term; + v = v.clamp( + nominal_v - max_allowed_deviation, + nominal_v + max_allowed_deviation, ); Some(UsbIsochronousFeedback::new(v as u32)) @@ -640,8 +609,6 @@ pub fn init_i2s(mut fc7: pac::FLEXCOMM7, i2s7: pac::I2S7, syscon: &mut Syscon) - // Select I2S TX function fc7.pselid.write(|w| w.persel().i2s_transmit()); - unsafe { NVIC::unmask(interrupt::FLEXCOMM7) } - let regs = i2s7; I2sTx { i2s: regs } } @@ -740,55 +707,49 @@ fn main() -> ! { clocks.support_usbhs_token().unwrap(), ); + defmt::info!("dma init"); + let i2s_dma_addr = &i2s_peripheral.i2s.fifowr as *const _ as *mut u32; + let dma = + DmaRing::::new(hal.dma.release(), &mut syscon, i2s_dma_addr, 4) + .unwrap(); + let dma_ref = DMA_RING.init(dma); + unsafe { DMA_RING_REF = Some(dma_ref) }; + defmt::info!("audio init"); let mut audio = Audio { state: Atomic::new(AudioState::Stopped), i2s: i2s_peripheral, dac: dac_impl, - producer: QUEUE.stream_producer(), + dma: dma_ring(), fb: FeedbackState::default(), alt_setting: 0, nodata_timeout_frame: AtomicUsize::new(0), + cur_rate: SAMPLE_RATE, + clock_pins: clock_sel_pins, _marker: core::marker::PhantomData, }; audio.init(); let usb_bus = UsbBus::new(usb_peripheral, usb0_vbus_pin); - let mut clock = Clock { - pins: clock_sel_pins, - cur_rate: SAMPLE_RATE, - }; - let config = AudioClassConfig::new(UsbSpeed::High, FunctionCode::Other, &mut clock, &mut audio) - .with_output_config(TerminalConfig::new( - 2, - 1, - 2, - FormatType1 { - bit_resolution: 32, - bytes_per_sample: 4, - }, - TerminalType::OutHeadphones, - ChannelConfig::default_chans(2), - IsochronousSynchronizationType::Asynchronous, - LockDelay::Undefined(0), - None, - )); + + let config = UsbAudioClassConfig::new(UsbSpeed::High, FunctionCode::Other, &mut audio) + .with_output_config( + TerminalConfig::builder() + .base_id(2) + .terminal_type(TerminalType::UsbUndefined) + .build(), + ); let mut uac2 = config.build(&usb_bus).unwrap(); #[cfg(feature = "hid")] let mut hid = HIDClass::new_ep_in(&usb_bus, AudioTelemetryReport::desc(), HID_INTERVAL_MS); - let mut usb_dev = UsbDeviceBuilder::new(&usb_bus, UsbVidPid(0x1209, 0xcc1d)) - .composite_with_iads() + let mut usb_dev = usbd_uac2::builder(&usb_bus, UsbVidPid(0x1209, 0xcc1d)) .strings(&[StringDescriptors::default() .manufacturer("VE7XEN") - .product("Guac Tortilla") - .serial_number("123456789")]) + .product("Guac Tortilla")]) .unwrap() .max_packet_size_0(64) .unwrap() - .device_class(0xef) - .device_sub_class(0x02) - .device_protocol(0x01) .build(); #[cfg(feature = "hid")]