From 43f8d38cb2be72c6079b7008922ec9f06f0c4878 Mon Sep 17 00:00:00 2001 From: Nell Date: Tue, 22 Jul 2025 00:22:39 +0200 Subject: [PATCH] pre-passage f32 --- src-tauri/src/app/ox_speak_app.rs | 7 +- src-tauri/src/core/capture.rs | 41 ++-- src-tauri/src/core/mixer.rs | 13 +- src-tauri/src/core/opus.rs | 46 ++++- src-tauri/src/core/playback.rs | 6 +- src-tauri/src/domain/audio_client.rs | 24 +-- src-tauri/src/domain/event.rs | 2 +- src-tauri/src/utils/audio_utils.rs | 271 ++++++++++++++++++++++++++- 8 files changed, 346 insertions(+), 64 deletions(-) diff --git a/src-tauri/src/app/ox_speak_app.rs b/src-tauri/src/app/ox_speak_app.rs index 97612b3..92b9eed 100644 --- a/src-tauri/src/app/ox_speak_app.rs +++ b/src-tauri/src/app/ox_speak_app.rs @@ -5,7 +5,7 @@ use tokio; use tokio::sync::mpsc; use crate::core::capture::AudioCapture; use crate::core::mixer::AudioMixer; -use crate::core::playback::AudioPlayback; +use crate::core::playback::{AudioPlayback, Speaker}; use crate::domain::audio_client::AudioClientManager; use crate::domain::event::{Event, EventBus}; use crate::network::udp::UdpSession; @@ -46,7 +46,10 @@ impl OxSpeakApp { let audio_capture = AudioCapture::default(event_bus.clone()); println!("Initializing audio client"); let audio_client_manager = AudioClientManager::new(); - let audio_mixer = AudioMixer::new(audio_client_manager.clone()); + + // todo : pas idéal (la récup du sample_rate), car le mieux serais de récupérer ça dynamiquement. Peut être charger le mixer depuis audio_playback ? + let stream_config = Speaker::default().get_stream_config(); + let audio_mixer = AudioMixer::new(stream_config.sample_rate.0 as usize, stream_config.channels as usize, audio_client_manager.clone()); let audio_playback = AudioPlayback::default(event_bus.clone(), audio_mixer.clone()); // UdpSession diff --git a/src-tauri/src/core/capture.rs b/src-tauri/src/core/capture.rs index c2a0fdc..33ab25e 100644 --- a/src-tauri/src/core/capture.rs +++ b/src-tauri/src/core/capture.rs @@ -7,6 +7,7 @@ use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; // ✅ Supprimé rubato complètement ! use crate::core::opus::AudioOpus; use crate::domain::event::{Event, EventBus}; +use crate::utils::audio_utils::AudioTools; use crate::utils::ringbuf::RingBuffer; #[derive(Clone)] @@ -18,7 +19,7 @@ pub struct AudioCapture { event_bus: EventBus, microphone: Microphone, running: Arc, - ring_buffer: RingBuffer, + ring_buffer: RingBuffer, steam: Option, worker: Option>, } @@ -41,25 +42,12 @@ impl Microphone { } pub fn get_stream_config(&self) -> StreamConfig { - let supported_channels = self.get_supported_channels(); - - // Priorité : mono si supporté, sinon prend le premier disponible - let channels = if supported_channels.contains(&1) { - 1 // ✅ Mono préféré - } else { - supported_channels.first().copied().unwrap_or(2) // Fallback - }; - - StreamConfig { - channels, - sample_rate: SampleRate(48_000), // ✅ Force 48kHz - buffer_size: BufferSize::Default, - } + self.get_input_config().into() } pub fn build_stream(&self, callback: F) -> Stream where - F: FnMut(&[i16], &cpal::InputCallbackInfo) + Send + 'static, + F: FnMut(&[f32], &cpal::InputCallbackInfo) + Send + 'static, { let config = self.get_stream_config(); self.device.build_input_stream( @@ -138,11 +126,12 @@ impl AudioCapture { let worker_running = self.running.clone(); let event_bus = self.event_bus.clone(); let stream_config = self.microphone.get_stream_config(); + let sample_rate: usize = stream_config.sample_rate.0 as usize; + let channels: usize = stream_config.channels as usize; println!("Audio config: {} channels @ {}Hz", stream_config.channels, stream_config.sample_rate.0); - // ✅ Simple : on assume 48kHz partout ! let opus = AudioOpus::new(48_000, 1, "voip"); let mut encoder = opus.create_encoder().unwrap(); let reader = self.ring_buffer.reader(); @@ -151,8 +140,8 @@ impl AudioCapture { self.worker = Some(thread::spawn(move || { println!("Audio processing thread started"); - let frame_size = 48_000 * 10 / 1000; // ✅ 10ms = 480 samples @ 48kHz - let mut raw_buffer = vec![0i16; frame_size]; + let frame_size = sample_rate * 10 / 1000; // ✅ 10ms = 480 samples @ 48kHz / 441 @ 44.1khz + let mut raw_buffer = vec![0f32; frame_size]; while worker_running.load(Ordering::Relaxed) { let _read_count = reader.pop_slice_blocking(&mut raw_buffer); @@ -161,16 +150,10 @@ impl AudioCapture { break; } - // ✅ Processing ultra-simple - let processed_audio = Self::process_audio_frame( - &stream_config, - &raw_buffer - ); + let sample = AudioTools::sample_to_mono(&raw_buffer, channels); + // todo : voir si il est nécessaire d'intégrer un resampling avec AudioResampler - // Events - event_bus.emit_sync(Event::AudioIn(processed_audio.clone())); - - match encoder.encode(&processed_audio) { + match encoder.encode(&sample) { Ok(encoded_data) => { event_bus.emit_sync(Event::AudioEncoded(encoded_data)) } @@ -178,6 +161,8 @@ impl AudioCapture { println!("Error encoding: {e}"); } } + // Events + event_bus.emit_sync(Event::AudioIn(sample)); } })); } diff --git a/src-tauri/src/core/mixer.rs b/src-tauri/src/core/mixer.rs index 389c40c..31e344f 100644 --- a/src-tauri/src/core/mixer.rs +++ b/src-tauri/src/core/mixer.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use arc_swap::ArcSwap; +use cpal::SampleRate; use crate::domain::audio_client::AudioClientManager; use crate::utils::ringbuf::{RingBufReader, RingBufWriter, RingBuffer}; @@ -9,11 +10,13 @@ use crate::utils::ringbuf::{RingBufReader, RingBufWriter, RingBuffer}; pub struct AudioMixer { audio_client_manager: AudioClientManager, buffer_writer: Arc>, - buffer_reader: Arc> + buffer_reader: Arc>, + sample_rate: SampleRate, + channels: usize, } impl AudioMixer { - pub fn new(audio_client_manager: AudioClientManager) -> Self { + pub fn new(sample_rate: usize, channels: usize, audio_client_manager: AudioClientManager) -> Self { let (buffer_writer, buffer_reader) = RingBuffer::new(2048).split(); Self { audio_client_manager, @@ -22,12 +25,12 @@ impl AudioMixer { } } pub fn mix_next_frame(&self, size: usize) { - let mut frames = Vec::>::new(); - // Récupérer les buffers audio des utilisateurs, par défaut, ils sont en mono, donc size / 2 - // convertir en stéréo, donc size * 2 frames + let mut frames = Vec::>::new(); let users_audio = self.audio_client_manager.take_audio_collection(size/2).into_iter() .map(|audio| AudioMixer::mono_to_stereo(audio)) .collect::>>(); + + frames.extend_from_slice(&users_audio); // Récupérer tous les sons des notifications (pas encore dev) diff --git a/src-tauri/src/core/opus.rs b/src-tauri/src/core/opus.rs index 6452a9c..d938038 100644 --- a/src-tauri/src/core/opus.rs +++ b/src-tauri/src/core/opus.rs @@ -1,4 +1,5 @@ use opus::{Application, Channels, Decoder, Encoder}; +use crate::utils::audio_utils::AudioSample; #[derive(Clone)] pub struct AudioOpus{ @@ -65,22 +66,31 @@ impl AudioOpusEncoder { Ok(Self{audio_opus, encoder}) } - pub fn encode(&mut self, frames: &[i16]) -> Result, String> { - let mut output = vec![0u8; 1276]; // 1276 octets (la vraie worst-case recommandée par Opus). - let len = self.encoder.encode(frames, output.as_mut_slice()) + // Méthode générique qui accepte i16 ou f32 + pub fn encode(&mut self, frames: &[T]) -> Result, String> { + // Convertir tous les échantillons vers i16 + let i16_frames: Vec = frames.iter().map(|sample| sample.to_i16()).collect(); + + // Utiliser la logique d'encodage existante + let mut output = vec![0u8; 1276]; + let len = self.encoder.encode(&i16_frames, output.as_mut_slice()) .map_err(|e| format!("Erreur encodage: {:?}", e))?; output.truncate(len); Ok(output) } - // 🔄 Approche avec buffer réutilisable (encore plus optimal) - fn encode_reuse(&mut self, frames: &[i16], output: &mut Vec) -> Result { + // Version avec buffer réutilisable + pub fn encode_into_slice(&mut self, frames: &[T], output: &mut Vec) -> Result { + let i16_frames: Vec = frames.iter().map(|sample| sample.to_i16()).collect(); + output.clear(); output.resize(1276, 0); - let len = self.encoder.encode(frames, output.as_mut_slice()).unwrap(); + let len = self.encoder.encode(&i16_frames, output.as_mut_slice()) + .map_err(|e| format!("Erreur encodage: {:?}", e))?; output.truncate(len); Ok(len) } + } pub struct AudioOpusDecoder{ @@ -101,10 +111,26 @@ impl AudioOpusDecoder { Ok(Self{audio_opus, decoder}) } - pub fn decode(&mut self, frames: &[u8]) -> Result, String> { - let mut output = vec![0i16; 5760]; - let len = self.decoder.decode(frames, output.as_mut_slice(), false).map_err(|e| format!("Erreur décodage: {:?}", e))?; - output.truncate(len); + pub fn decode(&mut self, frames: &[u8]) -> Result, String> { + let mut i16_output = vec![0i16; 5760]; + let len = self.decoder.decode(frames, i16_output.as_mut_slice(), false) + .map_err(|e| format!("Erreur décodage: {:?}", e))?; + i16_output.truncate(len); + + let output: Vec = i16_output.iter().map(|&sample| T::from_i16(sample)).collect(); Ok(output) } + + // Décodage avec buffer réutilisable + pub fn decode_into_slice(&mut self, frames: &[u8], output: &mut Vec) -> Result { + let mut i16_buffer = vec![0i16; 5760]; + let len = self.decoder.decode(frames, i16_buffer.as_mut_slice(), false) + .map_err(|e| format!("Erreur décodage: {:?}", e))?; + i16_buffer.truncate(len); + + output.clear(); + output.extend(i16_buffer.iter().map(|&sample| T::from_i16(sample))); + + Ok(len) + } } \ No newline at end of file diff --git a/src-tauri/src/core/playback.rs b/src-tauri/src/core/playback.rs index 8d39a48..3a47f1a 100644 --- a/src-tauri/src/core/playback.rs +++ b/src-tauri/src/core/playback.rs @@ -43,11 +43,7 @@ impl Speaker { // Lister toutes les configurations supportées self.print_supported_configs(); - StreamConfig { - channels: 2, - sample_rate: SampleRate(44100), - buffer_size: BufferSize::Default - } + self.get_output_config().into() } pub fn print_supported_configs(&self) { diff --git a/src-tauri/src/domain/audio_client.rs b/src-tauri/src/domain/audio_client.rs index 92aada2..a79b9be 100644 --- a/src-tauri/src/domain/audio_client.rs +++ b/src-tauri/src/domain/audio_client.rs @@ -11,8 +11,8 @@ use crate::utils::shared_store::SharedArcMap; pub struct AudioClient { uuid: uuid::Uuid, decode_sender: mpsc::Sender, - buffer_reader: RingBufReader, - buffer_writer: RingBufWriter + buffer_reader: RingBufReader, + buffer_writer: RingBufWriter } struct DecodeRequest { @@ -27,7 +27,7 @@ pub struct AudioClientManager { impl AudioClient { pub fn new() -> Self { - let (writer, reader) = RingBuffer::::new(4096).split(); + let (writer, reader) = RingBuffer::::new(4096).split(); let (decode_sender, mut decode_reader) = mpsc::channel::(100); let writer_clone = writer.clone(); @@ -39,13 +39,13 @@ impl AudioClient { while let Some(request) = decode_reader.recv().await { // si la séquence est "trop vieille" on la drop. (voir plus tard pour un système de rattrapage si c'est possible) if last_sequence < request.sequence { - // todo : si le décodage est trop long, voir pour le mettre dans un thread - // avec let result = tokio::task::spawn_blocking({ - // let data = request.data.clone(); - // move || decoder.decode(&data) - // }).await.unwrap(); + // todo : si le décodage est trop long, voir pour le mettre dans un thread avec + // let result = tokio::task::spawn_blocking({ + // let data = request.data.clone(); + // move || decoder.decode(&data) + // }).await.unwrap(); let start = std::time::Instant::now(); - let result = decoder.decode(&request.data); + let result = decoder.decode::(&request.data); if start.elapsed() > Duration::from_millis(1) { println!("⚠️ Frame drop possible: {:?}", start.elapsed()); } @@ -78,12 +78,12 @@ impl AudioClient { }); } - pub fn read_audio(&self, size: usize) -> Option> { + pub fn read_audio(&self, size: usize) -> Option> { if self.buffer_reader.len() < size { return None; } - let mut buffer = vec![0i16; size]; + let mut buffer = vec![0f32; size]; let read_count = self.buffer_reader.pop_slice(&mut buffer); if read_count == size { @@ -122,7 +122,7 @@ impl AudioClientManager { let _ = self.audio_clients.get(&uuid).unwrap().write_audio(sequence, data); } - pub fn take_audio_collection(&self, size: usize) -> Vec> { + pub fn take_audio_collection(&self, size: usize) -> Vec> { let mut buffers = Vec::new(); for client in self.audio_clients.values() { diff --git a/src-tauri/src/domain/event.rs b/src-tauri/src/domain/event.rs index d5ba80b..ffcbd18 100644 --- a/src-tauri/src/domain/event.rs +++ b/src-tauri/src/domain/event.rs @@ -5,7 +5,7 @@ pub enum Event { AppStarted, AppStopped, - AudioIn(Vec), + AudioIn(Vec), AudioEncoded(Vec), PlaybackTick(usize), diff --git a/src-tauri/src/utils/audio_utils.rs b/src-tauri/src/utils/audio_utils.rs index f37b78b..0d29aef 100644 --- a/src-tauri/src/utils/audio_utils.rs +++ b/src-tauri/src/utils/audio_utils.rs @@ -1,7 +1,71 @@ +use std::fmt::Debug; use rubato::{Resampler, SincFixedIn, SincInterpolationType, SincInterpolationParameters, WindowFunction}; use parking_lot::Mutex; use std::sync::Arc; +/// Trait pour les échantillons audio avec conversion automatique +pub trait AudioSample: Copy + Clone + Debug + Send + Sync + 'static { + fn to_i16(&self) -> i16; + fn to_f32(&self) -> f32; + fn from_i16(value: i16) -> Self; + fn from_f32(value: f32) -> Self; + fn zero() -> Self; + fn clamp_audio(&self) -> Self; +} + +impl AudioSample for i16 { + fn to_i16(&self) -> i16 { + *self + } + + fn to_f32(&self) -> f32 { + *self as f32 / i16::MAX as f32 + } + + fn from_i16(value: i16) -> Self { + value + } + + fn from_f32(value: f32) -> Self { + (value.clamp(-1.0, 1.0) * i16::MAX as f32) as i16 + } + + fn zero() -> Self { + 0i16 + } + + fn clamp_audio(&self) -> Self { + *self // i16 est déjà dans sa plage valide + } +} + +impl AudioSample for f32 { + fn to_i16(&self) -> i16 { + (self.clamp(-1.0, 1.0) * i16::MAX as f32) as i16 + } + + fn to_f32(&self) -> f32 { + *self + } + + fn from_i16(value: i16) -> Self { + value as f32 / i16::MAX as f32 + } + + fn from_f32(value: f32) -> Self { + value + } + + fn zero() -> Self { + 0.0f32 + } + + fn clamp_audio(&self) -> Self { + self.clamp(-1.0, 1.0) + } +} + + /// Resampler audio optimisé avec rubato pour temps réel #[derive(Clone)] pub struct AudioResampler { @@ -192,4 +256,209 @@ impl AudioResampler { let resampler = AudioResampler::new(); Ok(resampler.resample(input, from_rate, to_rate, channels)) } -} \ No newline at end of file +} + +pub struct AudioTools; + +impl AudioTools { + /// Convertit un échantillon audio multi-canaux en mono (générique) + /// + /// # Arguments + /// * `samples` - Les échantillons audio (format interleaved) + /// * `channels` - Le nombre de canaux d'entrée + /// + /// # Returns + /// Un Vec contenant les échantillons mono + /// + /// # Example + /// ```ignore + /// // Avec i16 + /// + /// let stereo_i16 = vec![100i16, 200i16, 150i16, 250i16]; + /// let mono_i16 = AudioTools::sample_to_mono(&stereo_i16, 2); + /// + /// // Avec f32 + /// let stereo_f32 = vec![0.5f32, -0.3f32, 0.2f32, 0.8f32]; + /// let mono_f32 = AudioTools::sample_to_mono(&stereo_f32, 2); + /// ``` + pub fn sample_to_mono(samples: &[T], channels: usize) -> Vec { + // Si déjà mono, retourne une copie + if channels <= 1 { + return samples.to_vec(); + } + + // Calcule le nombre de frames + let frame_count = samples.len() / channels; + let mut mono_samples = Vec::with_capacity(frame_count); + + // Pour chaque frame, calcule la moyenne des canaux + for frame_start in (0..samples.len()).step_by(channels) { + let frame_end = (frame_start + channels).min(samples.len()); + let frame = &samples[frame_start..frame_end]; + + // Calcule la moyenne en utilisant f32 comme format intermédiaire + let sum: f32 = frame.iter().map(|sample| sample.to_f32()).sum(); + let average = sum / channels as f32; + + // Convertit de retour au format original et clamp + let mono_sample = T::from_f32(average).clamp_audio(); + mono_samples.push(mono_sample); + } + + mono_samples + } + + /// Version avec stratégies de mixage (générique) + pub fn sample_to_mono_with_strategy( + samples: &[T], + channels: usize, + strategy: MonoMixStrategy + ) -> Vec { + if channels <= 1 { + return samples.to_vec(); + } + + let frame_count = samples.len() / channels; + let mut mono_samples = Vec::with_capacity(frame_count); + + for frame_start in (0..samples.len()).step_by(channels) { + let frame_end = (frame_start + channels).min(samples.len()); + let frame = &samples[frame_start..frame_end]; + + let mono_sample = match strategy { + MonoMixStrategy::Average => { + let sum: f32 = frame.iter().map(|s| s.to_f32()).sum(); + let average = sum / channels as f32; + T::from_f32(average).clamp_audio() + } + MonoMixStrategy::LeftChannel => frame[0], + MonoMixStrategy::RightChannel => { + if channels > 1 { frame[1] } else { frame[0] } + } + MonoMixStrategy::Max => { + let max_sample = frame.iter() + .max_by(|a, b| { + a.to_f32().abs().partial_cmp(&b.to_f32().abs()) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .unwrap_or(&frame[0]); + *max_sample + } + MonoMixStrategy::Rms => { + // Root Mean Square + let sum_squares: f32 = frame.iter() + .map(|s| { + let val = s.to_f32(); + val * val + }) + .sum(); + let rms = (sum_squares / channels as f32).sqrt(); + + // Préserve le signe général + let sum_sign: f32 = frame.iter().map(|s| s.to_f32()).sum(); + let final_value = if sum_sign >= 0.0 { rms } else { -rms }; + + T::from_f32(final_value).clamp_audio() + } + }; + + mono_samples.push(mono_sample); + } + + mono_samples + } + + /// Conversion mono vers stéréo (générique) + pub fn mono_to_stereo(mono_samples: &[T]) -> Vec { + let mut stereo_data = Vec::with_capacity(mono_samples.len() * 2); + + for &sample in mono_samples { + stereo_data.push(sample); // Canal gauche + stereo_data.push(sample); // Canal droit + } + + stereo_data + } + + /// Conversion entre formats d'échantillons + pub fn convert_format(samples: &[From]) -> Vec { + samples.iter() + .map(|&sample| To::from_f32(sample.to_f32())) + .collect() + } + + /// Mix plusieurs frames audio ensemble (générique) + pub fn mix_frames(frames: &[Vec], target_size: usize) -> Vec { + if frames.is_empty() { + return vec![T::zero(); target_size]; + } + + let mut mixed = vec![0.0f32; target_size]; + + // Mix tous les frames en f32 pour éviter les débordements + for frame in frames { + for (i, &sample) in frame.iter().enumerate() { + if i < target_size { + mixed[i] += sample.to_f32(); + } + } + } + + let count = frames.len().max(1) as f32; + + // Convertit de retour au format cible avec normalisation + mixed.into_iter() + .map(|sample| T::from_f32(sample / count).clamp_audio()) + .collect() + } + + /// Utilitaire pour changer le nombre de canaux + pub fn change_channel_count( + samples: &[T], + from_channels: usize, + to_channels: usize, + ) -> Vec { + match (from_channels, to_channels) { + (1, 2) => Self::mono_to_stereo(samples), + (2, 1) => Self::sample_to_mono(samples, 2), + (from, to) if from == to => samples.to_vec(), + (from, 1) => Self::sample_to_mono(samples, from), + (1, to) => { + // Mono vers multi-canaux : duplique sur tous les canaux + let mut result = Vec::with_capacity(samples.len() * to); + for &sample in samples { + for _ in 0..to { + result.push(sample); + } + } + result + } + (from, to) => { + // Cas complexe : passe par mono puis étend + let mono = Self::sample_to_mono(samples, from); + Self::change_channel_count(&mono, 1, to) + } + } + } +} + +/// Stratégies de conversion multi-canaux vers mono +#[derive(Debug, Clone, Copy)] +pub enum MonoMixStrategy { + /// Moyenne de tous les canaux (par défaut) + Average, + /// Utilise seulement le canal gauche + LeftChannel, + /// Utilise seulement le canal droit + RightChannel, + /// Prend l'échantillon avec l'amplitude maximale + Max, + /// Root Mean Square - plus précis énergétiquement + Rms, +} + +impl Default for MonoMixStrategy { + fn default() -> Self { + Self::Average + } +}