189 lines
5.8 KiB
Python
189 lines
5.8 KiB
Python
"""
|
|
音频预处理服务
|
|
|
|
使用 ffprobe/ffmpeg 对上传音频做统一探测和规范化,降低长会议音频的格式兼容风险。
|
|
当前阶段只做单文件预处理,不做拆片。
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
import json
|
|
import shutil
|
|
import subprocess
|
|
|
|
from app.utils.audio_parser import get_audio_duration
|
|
|
|
|
|
@dataclass
|
|
class AudioMetadata:
|
|
"""音频元数据"""
|
|
|
|
duration_seconds: int = 0
|
|
sample_rate: Optional[int] = None
|
|
channels: Optional[int] = None
|
|
codec_name: Optional[str] = None
|
|
format_name: Optional[str] = None
|
|
bit_rate: Optional[int] = None
|
|
|
|
|
|
@dataclass
|
|
class AudioPreprocessResult:
|
|
"""音频预处理结果"""
|
|
|
|
file_path: Path
|
|
file_name: str
|
|
file_size: int
|
|
metadata: AudioMetadata
|
|
applied: bool = False
|
|
output_format: Optional[str] = None
|
|
|
|
|
|
class AudioPreprocessService:
|
|
"""基于 ffmpeg 的音频预处理服务"""
|
|
|
|
TARGET_EXTENSION = ".m4a"
|
|
TARGET_SAMPLE_RATE = 16000
|
|
TARGET_CHANNELS = 1
|
|
TARGET_BITRATE = "64k"
|
|
|
|
def __init__(self):
|
|
self.ffmpeg_path = shutil.which("ffmpeg")
|
|
self.ffprobe_path = shutil.which("ffprobe")
|
|
|
|
def probe_audio(self, file_path: str | Path) -> AudioMetadata:
|
|
"""
|
|
使用 ffprobe 探测音频元数据。
|
|
"""
|
|
path = Path(file_path)
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"音频文件不存在: {path}")
|
|
|
|
if self.ffprobe_path:
|
|
metadata = self._probe_with_ffprobe(path)
|
|
if metadata:
|
|
return metadata
|
|
|
|
return AudioMetadata(duration_seconds=get_audio_duration(str(path)))
|
|
|
|
def preprocess(self, file_path: str | Path) -> AudioPreprocessResult:
|
|
"""
|
|
预处理音频为统一格式。
|
|
|
|
当前策略:
|
|
1. 去除视频流,仅保留音频
|
|
2. 统一单声道
|
|
3. 统一采样率 16k
|
|
4. 转为 m4a(aac)
|
|
"""
|
|
source_path = Path(file_path)
|
|
if not source_path.exists():
|
|
raise FileNotFoundError(f"音频文件不存在: {source_path}")
|
|
|
|
if not self.ffmpeg_path:
|
|
metadata = self.probe_audio(source_path)
|
|
return AudioPreprocessResult(
|
|
file_path=source_path,
|
|
file_name=source_path.name,
|
|
file_size=source_path.stat().st_size,
|
|
metadata=metadata,
|
|
applied=False,
|
|
output_format=source_path.suffix.lower().lstrip(".") or None,
|
|
)
|
|
|
|
output_path = source_path.with_name(f"{source_path.stem}_normalized{self.TARGET_EXTENSION}")
|
|
temp_output_path = output_path.with_name(f"{output_path.stem}.tmp{output_path.suffix}")
|
|
|
|
command = [
|
|
self.ffmpeg_path,
|
|
"-y",
|
|
"-i",
|
|
str(source_path),
|
|
"-vn",
|
|
"-ac",
|
|
str(self.TARGET_CHANNELS),
|
|
"-ar",
|
|
str(self.TARGET_SAMPLE_RATE),
|
|
"-c:a",
|
|
"aac",
|
|
"-b:a",
|
|
self.TARGET_BITRATE,
|
|
"-movflags",
|
|
"+faststart",
|
|
str(temp_output_path),
|
|
]
|
|
|
|
try:
|
|
completed = subprocess.run(
|
|
command,
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if completed.returncode != 0:
|
|
stderr = (completed.stderr or "").strip()
|
|
raise RuntimeError(stderr or "ffmpeg 预处理失败")
|
|
|
|
temp_output_path.replace(output_path)
|
|
metadata = self.probe_audio(output_path)
|
|
return AudioPreprocessResult(
|
|
file_path=output_path,
|
|
file_name=output_path.name,
|
|
file_size=output_path.stat().st_size,
|
|
metadata=metadata,
|
|
applied=True,
|
|
output_format=output_path.suffix.lower().lstrip("."),
|
|
)
|
|
finally:
|
|
if temp_output_path.exists():
|
|
temp_output_path.unlink()
|
|
|
|
def _probe_with_ffprobe(self, file_path: Path) -> Optional[AudioMetadata]:
|
|
command = [
|
|
self.ffprobe_path,
|
|
"-v",
|
|
"error",
|
|
"-print_format",
|
|
"json",
|
|
"-show_streams",
|
|
"-show_format",
|
|
str(file_path),
|
|
]
|
|
|
|
try:
|
|
completed = subprocess.run(
|
|
command,
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if completed.returncode != 0 or not completed.stdout:
|
|
return None
|
|
|
|
payload = json.loads(completed.stdout)
|
|
streams = payload.get("streams") or []
|
|
audio_stream = next((stream for stream in streams if stream.get("codec_type") == "audio"), {})
|
|
format_info = payload.get("format") or {}
|
|
|
|
duration_value = audio_stream.get("duration") or format_info.get("duration")
|
|
duration_seconds = int(float(duration_value)) if duration_value else 0
|
|
|
|
sample_rate_value = audio_stream.get("sample_rate")
|
|
channels_value = audio_stream.get("channels")
|
|
bit_rate_value = audio_stream.get("bit_rate") or format_info.get("bit_rate")
|
|
|
|
return AudioMetadata(
|
|
duration_seconds=duration_seconds,
|
|
sample_rate=int(sample_rate_value) if sample_rate_value else None,
|
|
channels=int(channels_value) if channels_value else None,
|
|
codec_name=audio_stream.get("codec_name"),
|
|
format_name=format_info.get("format_name"),
|
|
bit_rate=int(bit_rate_value) if bit_rate_value else None,
|
|
)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
audio_preprocess_service = AudioPreprocessService()
|