imetting/backend/app/services/audio_preprocess_service.py

189 lines
5.8 KiB
Python

"""
音频预处理服务
使用 ffprobe/ffmpeg 对上传音频做统一探测和规范化,降低长会议音频的格式兼容风险。
当前阶段只做单文件预处理,不做拆片。
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import json
import shutil
import subprocess
from app.utils.audio_parser import get_audio_duration
@dataclass
class AudioMetadata:
"""音频元数据"""
duration_seconds: int = 0
sample_rate: Optional[int] = None
channels: Optional[int] = None
codec_name: Optional[str] = None
format_name: Optional[str] = None
bit_rate: Optional[int] = None
@dataclass
class AudioPreprocessResult:
"""音频预处理结果"""
file_path: Path
file_name: str
file_size: int
metadata: AudioMetadata
applied: bool = False
output_format: Optional[str] = None
class AudioPreprocessService:
"""基于 ffmpeg 的音频预处理服务"""
TARGET_EXTENSION = ".m4a"
TARGET_SAMPLE_RATE = 16000
TARGET_CHANNELS = 1
TARGET_BITRATE = "64k"
def __init__(self):
self.ffmpeg_path = shutil.which("ffmpeg")
self.ffprobe_path = shutil.which("ffprobe")
def probe_audio(self, file_path: str | Path) -> AudioMetadata:
"""
使用 ffprobe 探测音频元数据。
"""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"音频文件不存在: {path}")
if self.ffprobe_path:
metadata = self._probe_with_ffprobe(path)
if metadata:
return metadata
return AudioMetadata(duration_seconds=get_audio_duration(str(path)))
def preprocess(self, file_path: str | Path) -> AudioPreprocessResult:
"""
预处理音频为统一格式。
当前策略:
1. 去除视频流,仅保留音频
2. 统一单声道
3. 统一采样率 16k
4. 转为 m4a(aac)
"""
source_path = Path(file_path)
if not source_path.exists():
raise FileNotFoundError(f"音频文件不存在: {source_path}")
if not self.ffmpeg_path:
metadata = self.probe_audio(source_path)
return AudioPreprocessResult(
file_path=source_path,
file_name=source_path.name,
file_size=source_path.stat().st_size,
metadata=metadata,
applied=False,
output_format=source_path.suffix.lower().lstrip(".") or None,
)
output_path = source_path.with_name(f"{source_path.stem}_normalized{self.TARGET_EXTENSION}")
temp_output_path = output_path.with_name(f"{output_path.stem}.tmp{output_path.suffix}")
command = [
self.ffmpeg_path,
"-y",
"-i",
str(source_path),
"-vn",
"-ac",
str(self.TARGET_CHANNELS),
"-ar",
str(self.TARGET_SAMPLE_RATE),
"-c:a",
"aac",
"-b:a",
self.TARGET_BITRATE,
"-movflags",
"+faststart",
str(temp_output_path),
]
try:
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
)
if completed.returncode != 0:
stderr = (completed.stderr or "").strip()
raise RuntimeError(stderr or "ffmpeg 预处理失败")
temp_output_path.replace(output_path)
metadata = self.probe_audio(output_path)
return AudioPreprocessResult(
file_path=output_path,
file_name=output_path.name,
file_size=output_path.stat().st_size,
metadata=metadata,
applied=True,
output_format=output_path.suffix.lower().lstrip("."),
)
finally:
if temp_output_path.exists():
temp_output_path.unlink()
def _probe_with_ffprobe(self, file_path: Path) -> Optional[AudioMetadata]:
command = [
self.ffprobe_path,
"-v",
"error",
"-print_format",
"json",
"-show_streams",
"-show_format",
str(file_path),
]
try:
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
)
if completed.returncode != 0 or not completed.stdout:
return None
payload = json.loads(completed.stdout)
streams = payload.get("streams") or []
audio_stream = next((stream for stream in streams if stream.get("codec_type") == "audio"), {})
format_info = payload.get("format") or {}
duration_value = audio_stream.get("duration") or format_info.get("duration")
duration_seconds = int(float(duration_value)) if duration_value else 0
sample_rate_value = audio_stream.get("sample_rate")
channels_value = audio_stream.get("channels")
bit_rate_value = audio_stream.get("bit_rate") or format_info.get("bit_rate")
return AudioMetadata(
duration_seconds=duration_seconds,
sample_rate=int(sample_rate_value) if sample_rate_value else None,
channels=int(channels_value) if channels_value else None,
codec_name=audio_stream.get("codec_name"),
format_name=format_info.get("format_name"),
bit_rate=int(bit_rate_value) if bit_rate_value else None,
)
except Exception:
return None
audio_preprocess_service = AudioPreprocessService()