""" 音频预处理服务 使用 ffprobe/ffmpeg 对上传音频做统一探测和规范化,降低长会议音频的格式兼容风险。 当前阶段只做单文件预处理,不做拆片。 """ from __future__ import annotations from dataclasses import dataclass from pathlib import Path from typing import Optional import json import shutil import subprocess from app.utils.audio_parser import get_audio_duration @dataclass class AudioMetadata: """音频元数据""" duration_seconds: int = 0 sample_rate: Optional[int] = None channels: Optional[int] = None codec_name: Optional[str] = None format_name: Optional[str] = None bit_rate: Optional[int] = None @dataclass class AudioPreprocessResult: """音频预处理结果""" file_path: Path file_name: str file_size: int metadata: AudioMetadata applied: bool = False output_format: Optional[str] = None class AudioPreprocessService: """基于 ffmpeg 的音频预处理服务""" TARGET_EXTENSION = ".m4a" TARGET_SAMPLE_RATE = 16000 TARGET_CHANNELS = 1 TARGET_BITRATE = "64k" def __init__(self): self.ffmpeg_path = shutil.which("ffmpeg") self.ffprobe_path = shutil.which("ffprobe") def probe_audio(self, file_path: str | Path) -> AudioMetadata: """ 使用 ffprobe 探测音频元数据。 """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"音频文件不存在: {path}") if self.ffprobe_path: metadata = self._probe_with_ffprobe(path) if metadata: return metadata return AudioMetadata(duration_seconds=get_audio_duration(str(path))) def preprocess(self, file_path: str | Path) -> AudioPreprocessResult: """ 预处理音频为统一格式。 当前策略: 1. 去除视频流,仅保留音频 2. 统一单声道 3. 统一采样率 16k 4. 转为 m4a(aac) """ source_path = Path(file_path) if not source_path.exists(): raise FileNotFoundError(f"音频文件不存在: {source_path}") if not self.ffmpeg_path: metadata = self.probe_audio(source_path) return AudioPreprocessResult( file_path=source_path, file_name=source_path.name, file_size=source_path.stat().st_size, metadata=metadata, applied=False, output_format=source_path.suffix.lower().lstrip(".") or None, ) output_path = source_path.with_name(f"{source_path.stem}_normalized{self.TARGET_EXTENSION}") temp_output_path = output_path.with_name(f"{output_path.stem}.tmp{output_path.suffix}") command = [ self.ffmpeg_path, "-y", "-i", str(source_path), "-vn", "-ac", str(self.TARGET_CHANNELS), "-ar", str(self.TARGET_SAMPLE_RATE), "-c:a", "aac", "-b:a", self.TARGET_BITRATE, "-movflags", "+faststart", str(temp_output_path), ] try: completed = subprocess.run( command, check=False, capture_output=True, text=True, ) if completed.returncode != 0: stderr = (completed.stderr or "").strip() raise RuntimeError(stderr or "ffmpeg 预处理失败") temp_output_path.replace(output_path) metadata = self.probe_audio(output_path) return AudioPreprocessResult( file_path=output_path, file_name=output_path.name, file_size=output_path.stat().st_size, metadata=metadata, applied=True, output_format=output_path.suffix.lower().lstrip("."), ) finally: if temp_output_path.exists(): temp_output_path.unlink() def _probe_with_ffprobe(self, file_path: Path) -> Optional[AudioMetadata]: command = [ self.ffprobe_path, "-v", "error", "-print_format", "json", "-show_streams", "-show_format", str(file_path), ] try: completed = subprocess.run( command, check=False, capture_output=True, text=True, ) if completed.returncode != 0 or not completed.stdout: return None payload = json.loads(completed.stdout) streams = payload.get("streams") or [] audio_stream = next((stream for stream in streams if stream.get("codec_type") == "audio"), {}) format_info = payload.get("format") or {} duration_value = audio_stream.get("duration") or format_info.get("duration") duration_seconds = int(float(duration_value)) if duration_value else 0 sample_rate_value = audio_stream.get("sample_rate") channels_value = audio_stream.get("channels") bit_rate_value = audio_stream.get("bit_rate") or format_info.get("bit_rate") return AudioMetadata( duration_seconds=duration_seconds, sample_rate=int(sample_rate_value) if sample_rate_value else None, channels=int(channels_value) if channels_value else None, codec_name=audio_stream.get("codec_name"), format_name=format_info.get("format_name"), bit_rate=int(bit_rate_value) if bit_rate_value else None, ) except Exception: return None audio_preprocess_service = AudioPreprocessService()