codex/dev
mula.liu 2026-04-09 17:51:34 +08:00
parent 41f71e649d
commit 3fe28934cc
18 changed files with 590 additions and 260 deletions

View File

@ -52,6 +52,9 @@ vim .env # 配置七牛云、LLM密钥等
- ✅ 启动所有服务
- ✅ 等待健康检查
说明:
- 后端镜像现在依赖系统级 `ffmpeg/ffprobe` 做音频预处理,已在 `backend/Dockerfile` 中安装,无需宿主机额外安装。
### 方式二:手动启动
```bash
@ -119,6 +122,11 @@ HTTPS_PORT=443
# HTTP_PORT=80
```
### 音频预处理依赖
- Docker 部署:后端容器内已安装 `ffmpeg`
- 非 Docker 部署:请确保服务器可执行 `ffmpeg``ffprobe`
## 📦 数据目录
所有数据存储在 `./data/` 目录:

View File

@ -19,6 +19,7 @@ COPY requirements.txt .
RUN apt-get update && apt-get install -y \
gcc \
curl \
ffmpeg \
default-libmysqlclient-dev \
pkg-config \
&& pip install --index-url https://mirrors.aliyun.com/pypi/simple --no-cache-dir -r requirements.txt \

View File

@ -44,19 +44,9 @@ class AudioModelUpsertRequest(BaseModel):
provider: str | None = None
endpoint_url: str | None = None
api_key: str | None = None
request_timeout_seconds: int = 300
extra_config: dict[str, Any] | None = None
asr_model_name: str | None = None
asr_vocabulary_id: str | None = None
hot_word_group_id: int | None = None
asr_speaker_count: int | None = None
asr_language_hints: str | None = None
asr_disfluency_removal_enabled: bool | None = None
asr_diarization_enabled: bool | None = None
vp_template_text: str | None = None
vp_duration_seconds: int | None = None
vp_sample_rate: int | None = None
vp_channels: int | None = None
vp_max_size_bytes: int | None = None
description: str | None = None
is_active: bool = True
is_default: bool = False

View File

@ -5,8 +5,8 @@ from app.core.auth import get_current_user
from app.core.response import create_api_response
from app.services.async_transcription_service import AsyncTranscriptionService
from app.services.async_meeting_service import async_meeting_service
from app.services.audio_preprocess_service import audio_preprocess_service
from app.services.audio_service import handle_audio_upload
from app.utils.audio_parser import get_audio_duration
from pydantic import BaseModel
from typing import Optional, List
from datetime import datetime, timedelta
@ -456,18 +456,30 @@ async def complete_upload(
}
)
# 6. 获取文件信息
# 6. 对合并后的音频执行统一预处理
full_path = BASE_DIR / file_path.lstrip('/')
file_size = full_path.stat().st_size
file_name = full_path.name
# 6.5 获取音频时长
audio_duration = 0
try:
audio_duration = get_audio_duration(str(full_path))
print(f"音频时长: {audio_duration}")
preprocess_result = audio_preprocess_service.preprocess(full_path)
processed_full_path = preprocess_result.file_path
file_size = preprocess_result.file_size
file_name = preprocess_result.file_name
audio_duration = preprocess_result.metadata.duration_seconds
file_path = f"/{processed_full_path.relative_to(BASE_DIR)}"
print(
f"流式上传音频预处理完成: source={full_path.name}, "
f"target={processed_full_path.name}, duration={audio_duration}s, "
f"applied={preprocess_result.applied}"
)
except Exception as e:
print(f"警告: 获取音频时长失败,但不影响后续流程: {e}")
if full_path.exists():
try:
os.remove(full_path)
except OSError:
pass
return create_api_response(
code="500",
message=f"音频预处理失败: {str(e)}"
)
# 7. 调用 audio_service 处理文件(数据库更新、启动转录和总结)
result = handle_audio_upload(
@ -484,8 +496,24 @@ async def complete_upload(
# 如果处理失败,返回错误
if not result["success"]:
cleanup_paths = [processed_full_path]
if processed_full_path != full_path:
cleanup_paths.append(full_path)
for cleanup_path in cleanup_paths:
if cleanup_path.exists():
try:
os.remove(cleanup_path)
except OSError:
pass
return result["response"]
if preprocess_result.applied and processed_full_path != full_path and full_path.exists():
try:
os.remove(full_path)
except OSError:
pass
# 8. 返回成功响应
transcription_task_id = result["transcription_task_id"]
message_suffix = ""

View File

@ -117,7 +117,6 @@ async def delete_group(id: int, current_user: dict = Depends(get_current_admin_u
"""
UPDATE audio_model_config
SET hot_word_group_id = NULL,
asr_vocabulary_id = NULL,
extra_config = JSON_REMOVE(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id')
WHERE hot_word_group_id = %s
""",
@ -190,15 +189,14 @@ async def sync_group(id: int, current_user: dict = Depends(get_current_admin_use
(vocab_id, id),
)
# 更新关联该组的所有 audio_model_config.asr_vocabulary_id
# 更新关联该组的所有 audio_model_config.extra_config.vocabulary_id
cursor.execute(
"""
UPDATE audio_model_config
SET asr_vocabulary_id = %s,
extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s)
SET extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s)
WHERE hot_word_group_id = %s
""",
(vocab_id, vocab_id, id),
(vocab_id, id),
)
conn.commit()

View File

@ -76,6 +76,7 @@ class Meeting(BaseModel):
description: Optional[str] = None
creator_id: int
creator_username: str
creator_account: Optional[str] = None
created_at: datetime.datetime
attendees: List[AttendeeInfo]
attendee_ids: Optional[List[int]] = None

View File

@ -92,24 +92,12 @@ def _merge_audio_extra_config(request, vocabulary_id: str | None = None) -> dict
extra_config = _parse_json_object(request.extra_config)
if request.audio_scene == "asr":
legacy_config = {
"model": request.asr_model_name,
"speaker_count": request.asr_speaker_count,
"language_hints": request.asr_language_hints,
"disfluency_removal_enabled": request.asr_disfluency_removal_enabled,
"diarization_enabled": request.asr_diarization_enabled,
}
if vocabulary_id:
extra_config["vocabulary_id"] = vocabulary_id
else:
legacy_config = {
"model": request.model_name,
"template_text": request.vp_template_text,
"duration_seconds": request.vp_duration_seconds,
"sample_rate": request.vp_sample_rate,
"channels": request.vp_channels,
"max_size_bytes": request.vp_max_size_bytes,
}
extra_config.pop("vocabulary_id", None)
merged = {**legacy_config, **extra_config}
merged = dict(extra_config)
language_hints = _normalize_string_list(merged.get("language_hints"))
if language_hints is not None:
@ -119,94 +107,20 @@ def _merge_audio_extra_config(request, vocabulary_id: str | None = None) -> dict
if channel_id is not None:
merged["channel_id"] = channel_id
resolved_vocabulary_id = vocabulary_id or merged.get("vocabulary_id") or request.asr_vocabulary_id
if request.audio_scene == "asr" and resolved_vocabulary_id:
merged["vocabulary_id"] = resolved_vocabulary_id
return _clean_extra_config(merged)
def _extract_legacy_audio_columns(audio_scene: str, extra_config: dict[str, Any]) -> dict[str, Any]:
extra_config = _parse_json_object(extra_config)
columns = {
"asr_model_name": None,
"asr_vocabulary_id": None,
"asr_speaker_count": None,
"asr_language_hints": None,
"asr_disfluency_removal_enabled": None,
"asr_diarization_enabled": None,
"vp_template_text": None,
"vp_duration_seconds": None,
"vp_sample_rate": None,
"vp_channels": None,
"vp_max_size_bytes": None,
}
if audio_scene == "asr":
language_hints = extra_config.get("language_hints")
if isinstance(language_hints, list):
language_hints = ",".join(str(item).strip() for item in language_hints if str(item).strip())
columns.update(
{
"asr_model_name": extra_config.get("model"),
"asr_vocabulary_id": extra_config.get("vocabulary_id"),
"asr_speaker_count": extra_config.get("speaker_count"),
"asr_language_hints": language_hints,
"asr_disfluency_removal_enabled": 1 if extra_config.get("disfluency_removal_enabled") is True else 0 if extra_config.get("disfluency_removal_enabled") is False else None,
"asr_diarization_enabled": 1 if extra_config.get("diarization_enabled") is True else 0 if extra_config.get("diarization_enabled") is False else None,
}
)
else:
columns.update(
{
"vp_template_text": extra_config.get("template_text"),
"vp_duration_seconds": extra_config.get("duration_seconds"),
"vp_sample_rate": extra_config.get("sample_rate"),
"vp_channels": extra_config.get("channels"),
"vp_max_size_bytes": extra_config.get("max_size_bytes"),
}
)
return columns
def _normalize_audio_row(row: dict[str, Any]) -> dict[str, Any]:
extra_config = _parse_json_object(row.get("extra_config"))
if row.get("audio_scene") == "asr":
if extra_config.get("model") is None and row.get("asr_model_name") is not None:
extra_config["model"] = row["asr_model_name"]
if extra_config.get("vocabulary_id") is None and row.get("asr_vocabulary_id") is not None:
extra_config["vocabulary_id"] = row["asr_vocabulary_id"]
if extra_config.get("speaker_count") is None and row.get("asr_speaker_count") is not None:
extra_config["speaker_count"] = row["asr_speaker_count"]
if extra_config.get("language_hints") is None and row.get("asr_language_hints"):
extra_config["language_hints"] = _normalize_string_list(row["asr_language_hints"])
if extra_config.get("disfluency_removal_enabled") is None and row.get("asr_disfluency_removal_enabled") is not None:
extra_config["disfluency_removal_enabled"] = bool(row["asr_disfluency_removal_enabled"])
if extra_config.get("diarization_enabled") is None and row.get("asr_diarization_enabled") is not None:
extra_config["diarization_enabled"] = bool(row["asr_diarization_enabled"])
else:
if extra_config.get("model") is None and row.get("model_name"):
extra_config["model"] = row["model_name"]
if extra_config.get("template_text") is None and row.get("vp_template_text") is not None:
extra_config["template_text"] = row["vp_template_text"]
if extra_config.get("duration_seconds") is None and row.get("vp_duration_seconds") is not None:
extra_config["duration_seconds"] = row["vp_duration_seconds"]
if extra_config.get("sample_rate") is None and row.get("vp_sample_rate") is not None:
extra_config["sample_rate"] = row["vp_sample_rate"]
if extra_config.get("channels") is None and row.get("vp_channels") is not None:
extra_config["channels"] = row["vp_channels"]
if extra_config.get("max_size_bytes") is None and row.get("vp_max_size_bytes") is not None:
extra_config["max_size_bytes"] = row["vp_max_size_bytes"]
row["extra_config"] = extra_config
row["service_model_name"] = extra_config.get("model")
row["request_timeout_seconds"] = int(row.get("request_timeout_seconds") or 300)
return row
def _resolve_hot_word_vocabulary_id(cursor, request) -> str | None:
vocabulary_id = request.asr_vocabulary_id
vocabulary_id = _parse_json_object(request.extra_config).get("vocabulary_id")
if request.hot_word_group_id:
cursor.execute("SELECT vocabulary_id FROM hot_word_group WHERE id = %s", (request.hot_word_group_id,))
group_row = cursor.fetchone()
@ -482,10 +396,8 @@ def list_audio_model_configs(scene: str = "all"):
cursor = conn.cursor(dictionary=True)
sql = """
SELECT a.config_id, a.model_code, a.model_name, a.audio_scene, a.provider, a.endpoint_url, a.api_key,
a.asr_model_name, a.asr_vocabulary_id, a.hot_word_group_id, a.asr_speaker_count, a.asr_language_hints,
a.asr_disfluency_removal_enabled, a.asr_diarization_enabled,
a.vp_template_text, a.vp_duration_seconds, a.vp_sample_rate, a.vp_channels, a.vp_max_size_bytes,
a.extra_config, a.description, a.is_active, a.is_default, a.created_at, a.updated_at,
a.request_timeout_seconds, a.hot_word_group_id, a.extra_config,
a.description, a.is_active, a.is_default, a.created_at, a.updated_at,
g.name AS hot_word_group_name, g.vocabulary_id AS hot_word_group_vocab_id
FROM audio_model_config a
LEFT JOIN hot_word_group g ON g.id = a.hot_word_group_id
@ -524,17 +436,13 @@ def create_audio_model_config(request):
asr_vocabulary_id = _resolve_hot_word_vocabulary_id(cursor, request)
extra_config = _merge_audio_extra_config(request, vocabulary_id=asr_vocabulary_id)
legacy_columns = _extract_legacy_audio_columns(request.audio_scene, extra_config)
cursor.execute(
"""
INSERT INTO audio_model_config
(model_code, model_name, audio_scene, provider, endpoint_url, api_key,
asr_model_name, asr_vocabulary_id, hot_word_group_id, asr_speaker_count, asr_language_hints,
asr_disfluency_removal_enabled, asr_diarization_enabled,
vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes,
extra_config, description, is_active, is_default)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
request_timeout_seconds, hot_word_group_id, extra_config, description, is_active, is_default)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
""",
(
request.model_code,
@ -543,18 +451,8 @@ def create_audio_model_config(request):
request.provider,
request.endpoint_url,
request.api_key,
legacy_columns["asr_model_name"],
legacy_columns["asr_vocabulary_id"],
request.request_timeout_seconds,
request.hot_word_group_id,
legacy_columns["asr_speaker_count"],
legacy_columns["asr_language_hints"],
legacy_columns["asr_disfluency_removal_enabled"],
legacy_columns["asr_diarization_enabled"],
legacy_columns["vp_template_text"],
legacy_columns["vp_duration_seconds"],
legacy_columns["vp_sample_rate"],
legacy_columns["vp_channels"],
legacy_columns["vp_max_size_bytes"],
json.dumps(extra_config, ensure_ascii=False),
request.description,
1 if request.is_active else 0,
@ -594,16 +492,13 @@ def update_audio_model_config(model_code: str, request):
asr_vocabulary_id = _resolve_hot_word_vocabulary_id(cursor, request)
extra_config = _merge_audio_extra_config(request, vocabulary_id=asr_vocabulary_id)
legacy_columns = _extract_legacy_audio_columns(request.audio_scene, extra_config)
cursor.execute(
"""
UPDATE audio_model_config
SET model_code = %s, model_name = %s, audio_scene = %s, provider = %s, endpoint_url = %s, api_key = %s,
asr_model_name = %s, asr_vocabulary_id = %s, hot_word_group_id = %s, asr_speaker_count = %s, asr_language_hints = %s,
asr_disfluency_removal_enabled = %s, asr_diarization_enabled = %s,
vp_template_text = %s, vp_duration_seconds = %s, vp_sample_rate = %s, vp_channels = %s, vp_max_size_bytes = %s,
extra_config = %s, description = %s, is_active = %s, is_default = %s
request_timeout_seconds = %s, hot_word_group_id = %s, extra_config = %s,
description = %s, is_active = %s, is_default = %s
WHERE model_code = %s
""",
(
@ -613,18 +508,8 @@ def update_audio_model_config(model_code: str, request):
request.provider,
request.endpoint_url,
request.api_key,
legacy_columns["asr_model_name"],
legacy_columns["asr_vocabulary_id"],
request.request_timeout_seconds,
request.hot_word_group_id,
legacy_columns["asr_speaker_count"],
legacy_columns["asr_language_hints"],
legacy_columns["asr_disfluency_removal_enabled"],
legacy_columns["asr_diarization_enabled"],
legacy_columns["vp_template_text"],
legacy_columns["vp_duration_seconds"],
legacy_columns["vp_sample_rate"],
legacy_columns["vp_channels"],
legacy_columns["vp_max_size_bytes"],
json.dumps(extra_config, ensure_ascii=False),
request.description,
1 if request.is_active else 0,
@ -693,6 +578,7 @@ def test_audio_model_config(request):
"api_key": request.api_key,
"audio_scene": request.audio_scene,
"hot_word_group_id": request.hot_word_group_id,
"request_timeout_seconds": request.request_timeout_seconds,
**extra_config,
}
result = transcription_service.test_asr_model(runtime_config, test_file_url=request.test_file_url)

View File

@ -14,6 +14,19 @@ from app.core.database import get_db_connection
from app.services.system_config_service import SystemConfigService
class _DefaultTimeoutSession(requests.Session):
"""为 requests.Session 注入默认超时。"""
def __init__(self, default_timeout: Optional[int] = None):
super().__init__()
self.default_timeout = default_timeout
def request(self, method, url, **kwargs):
if "timeout" not in kwargs and self.default_timeout:
kwargs["timeout"] = self.default_timeout
return super().request(method, url, **kwargs)
class AsyncTranscriptionService:
"""异步转录服务类"""
@ -23,8 +36,8 @@ class AsyncTranscriptionService:
self.base_url = APP_CONFIG['base_url']
@staticmethod
def _create_requests_session() -> requests.Session:
session = requests.Session()
def _create_requests_session(default_timeout: Optional[int] = None) -> requests.Session:
session = _DefaultTimeoutSession(default_timeout=default_timeout)
session.trust_env = os.getenv("IMEETING_USE_SYSTEM_PROXY", "").lower() in {"1", "true", "yes", "on"}
return session
@ -57,6 +70,35 @@ class AsyncTranscriptionService:
request_options["base_address"] = base_address
return request_options
@staticmethod
def _resolve_request_timeout_seconds(audio_config: Optional[Dict[str, Any]] = None) -> int:
value = (audio_config or {}).get("request_timeout_seconds")
try:
timeout_seconds = int(value)
except (TypeError, ValueError):
timeout_seconds = 300
return max(10, timeout_seconds)
def _dashscope_async_call(self, request_options: Dict[str, Any], call_params: Dict[str, Any], timeout_seconds: int):
session = self._create_requests_session(timeout_seconds)
try:
try:
return Transcription.async_call(session=session, **request_options, **call_params)
except TypeError:
return Transcription.async_call(**request_options, **call_params)
finally:
session.close()
def _dashscope_fetch(self, paraformer_task_id: str, request_options: Dict[str, Any], timeout_seconds: int):
session = self._create_requests_session(timeout_seconds)
try:
try:
return Transcription.fetch(task=paraformer_task_id, session=session, **request_options)
except TypeError:
return Transcription.fetch(task=paraformer_task_id, **request_options)
finally:
session.close()
@staticmethod
def _build_dashscope_call_params(audio_config: Dict[str, Any], file_url: str) -> Dict[str, Any]:
model_name = audio_config.get("model") or "paraformer-v2"
@ -93,13 +135,14 @@ class AsyncTranscriptionService:
raise Exception(f"当前仅支持 DashScope 音频识别测试,暂不支持供应商: {provider}")
request_options = self._build_dashscope_request_options(audio_config)
timeout_seconds = self._resolve_request_timeout_seconds(audio_config)
dashscope.api_key = request_options["api_key"]
target_file_url = (
test_file_url
or "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav"
)
call_params = self._build_dashscope_call_params(audio_config, target_file_url)
response = Transcription.async_call(**request_options, **call_params)
response = self._dashscope_async_call(request_options, call_params, timeout_seconds)
if response.status_code != HTTPStatus.OK:
raise Exception(response.message or "音频模型测试失败")
@ -154,6 +197,7 @@ class AsyncTranscriptionService:
raise Exception(f"当前仅支持 DashScope 音频识别,暂不支持供应商: {provider}")
request_options = self._build_dashscope_request_options(audio_config)
timeout_seconds = self._resolve_request_timeout_seconds(audio_config)
dashscope.api_key = request_options["api_key"]
call_params = self._build_dashscope_call_params(audio_config, file_url)
@ -164,7 +208,7 @@ class AsyncTranscriptionService:
)
# 3. 调用Paraformer异步API
task_response = Transcription.async_call(**request_options, **call_params)
task_response = self._dashscope_async_call(request_options, call_params, timeout_seconds)
if task_response.status_code != HTTPStatus.OK:
print(f"Failed to start transcription: {task_response.status_code}, {task_response.message}")
@ -238,11 +282,11 @@ class AsyncTranscriptionService:
# 2. 查询外部API获取状态
try:
request_options = self._build_dashscope_request_options(
SystemConfigService.get_active_audio_model_config("asr")
)
audio_config = SystemConfigService.get_active_audio_model_config("asr")
request_options = self._build_dashscope_request_options(audio_config)
timeout_seconds = self._resolve_request_timeout_seconds(audio_config)
dashscope.api_key = request_options["api_key"]
paraformer_response = Transcription.fetch(task=paraformer_task_id, **request_options)
paraformer_response = self._dashscope_fetch(paraformer_task_id, request_options, timeout_seconds)
if paraformer_response.status_code != HTTPStatus.OK:
raise Exception(f"Failed to fetch task status from provider: {paraformer_response.message}")
@ -560,9 +604,11 @@ class AsyncTranscriptionService:
transcription_url = paraformer_output['results'][0]['transcription_url']
print(f"Fetching transcription from URL: {transcription_url}")
session = self._create_requests_session()
audio_config = SystemConfigService.get_active_audio_model_config("asr")
timeout_seconds = self._resolve_request_timeout_seconds(audio_config)
session = self._create_requests_session(timeout_seconds)
try:
response = session.get(transcription_url)
response = session.get(transcription_url, timeout=timeout_seconds)
finally:
session.close()
response.raise_for_status()

View File

@ -0,0 +1,188 @@
"""
音频预处理服务
使用 ffprobe/ffmpeg 对上传音频做统一探测和规范化降低长会议音频的格式兼容风险
当前阶段只做单文件预处理不做拆片
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import json
import shutil
import subprocess
from app.utils.audio_parser import get_audio_duration
@dataclass
class AudioMetadata:
"""音频元数据"""
duration_seconds: int = 0
sample_rate: Optional[int] = None
channels: Optional[int] = None
codec_name: Optional[str] = None
format_name: Optional[str] = None
bit_rate: Optional[int] = None
@dataclass
class AudioPreprocessResult:
"""音频预处理结果"""
file_path: Path
file_name: str
file_size: int
metadata: AudioMetadata
applied: bool = False
output_format: Optional[str] = None
class AudioPreprocessService:
"""基于 ffmpeg 的音频预处理服务"""
TARGET_EXTENSION = ".m4a"
TARGET_SAMPLE_RATE = 16000
TARGET_CHANNELS = 1
TARGET_BITRATE = "64k"
def __init__(self):
self.ffmpeg_path = shutil.which("ffmpeg")
self.ffprobe_path = shutil.which("ffprobe")
def probe_audio(self, file_path: str | Path) -> AudioMetadata:
"""
使用 ffprobe 探测音频元数据
"""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"音频文件不存在: {path}")
if self.ffprobe_path:
metadata = self._probe_with_ffprobe(path)
if metadata:
return metadata
return AudioMetadata(duration_seconds=get_audio_duration(str(path)))
def preprocess(self, file_path: str | Path) -> AudioPreprocessResult:
"""
预处理音频为统一格式
当前策略
1. 去除视频流仅保留音频
2. 统一单声道
3. 统一采样率 16k
4. 转为 m4a(aac)
"""
source_path = Path(file_path)
if not source_path.exists():
raise FileNotFoundError(f"音频文件不存在: {source_path}")
if not self.ffmpeg_path:
metadata = self.probe_audio(source_path)
return AudioPreprocessResult(
file_path=source_path,
file_name=source_path.name,
file_size=source_path.stat().st_size,
metadata=metadata,
applied=False,
output_format=source_path.suffix.lower().lstrip(".") or None,
)
output_path = source_path.with_name(f"{source_path.stem}_normalized{self.TARGET_EXTENSION}")
temp_output_path = output_path.with_name(f"{output_path.stem}.tmp{output_path.suffix}")
command = [
self.ffmpeg_path,
"-y",
"-i",
str(source_path),
"-vn",
"-ac",
str(self.TARGET_CHANNELS),
"-ar",
str(self.TARGET_SAMPLE_RATE),
"-c:a",
"aac",
"-b:a",
self.TARGET_BITRATE,
"-movflags",
"+faststart",
str(temp_output_path),
]
try:
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
)
if completed.returncode != 0:
stderr = (completed.stderr or "").strip()
raise RuntimeError(stderr or "ffmpeg 预处理失败")
temp_output_path.replace(output_path)
metadata = self.probe_audio(output_path)
return AudioPreprocessResult(
file_path=output_path,
file_name=output_path.name,
file_size=output_path.stat().st_size,
metadata=metadata,
applied=True,
output_format=output_path.suffix.lower().lstrip("."),
)
finally:
if temp_output_path.exists():
temp_output_path.unlink()
def _probe_with_ffprobe(self, file_path: Path) -> Optional[AudioMetadata]:
command = [
self.ffprobe_path,
"-v",
"error",
"-print_format",
"json",
"-show_streams",
"-show_format",
str(file_path),
]
try:
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
)
if completed.returncode != 0 or not completed.stdout:
return None
payload = json.loads(completed.stdout)
streams = payload.get("streams") or []
audio_stream = next((stream for stream in streams if stream.get("codec_type") == "audio"), {})
format_info = payload.get("format") or {}
duration_value = audio_stream.get("duration") or format_info.get("duration")
duration_seconds = int(float(duration_value)) if duration_value else 0
sample_rate_value = audio_stream.get("sample_rate")
channels_value = audio_stream.get("channels")
bit_rate_value = audio_stream.get("bit_rate") or format_info.get("bit_rate")
return AudioMetadata(
duration_seconds=duration_seconds,
sample_rate=int(sample_rate_value) if sample_rate_value else None,
channels=int(channels_value) if channels_value else None,
codec_name=audio_stream.get("codec_name"),
format_name=format_info.get("format_name"),
bit_rate=int(bit_rate_value) if bit_rate_value else None,
)
except Exception:
return None
audio_preprocess_service = AudioPreprocessService()

View File

@ -8,8 +8,8 @@ from app.services.llm_service import LLMService
from app.services.async_transcription_service import AsyncTranscriptionService
from app.services.async_meeting_service import async_meeting_service
from app.services.audio_service import handle_audio_upload
from app.services.audio_preprocess_service import audio_preprocess_service
from app.services.system_config_service import SystemConfigService
from app.utils.audio_parser import get_audio_duration
from app.core.auth import get_current_user, get_optional_current_user
from app.core.response import create_api_response
from typing import Any, Dict, List, Optional
@ -479,7 +479,7 @@ def get_meeting_details(meeting_id: int, current_user: dict = Depends(get_curren
cursor = connection.cursor(dictionary=True)
query = '''
SELECT m.meeting_id, m.title, m.meeting_time, m.summary, m.created_at, m.tags,
m.user_id as creator_id, u.caption as creator_username, m.prompt_id,
m.user_id as creator_id, u.caption as creator_username, u.username as creator_account, m.prompt_id,
af.file_path as audio_file_path, af.duration as audio_duration,
p.name as prompt_name, m.access_password
FROM meetings m
@ -505,7 +505,8 @@ def get_meeting_details(meeting_id: int, current_user: dict = Depends(get_curren
meeting_id=meeting['meeting_id'], title=meeting['title'], meeting_time=meeting['meeting_time'],
summary=meeting['summary'], created_at=meeting['created_at'], attendees=attendees,
attendee_ids=[row['user_id'] for row in attendees_data],
creator_id=meeting['creator_id'], creator_username=meeting['creator_username'], tags=tags,
creator_id=meeting['creator_id'], creator_username=meeting['creator_username'],
creator_account=meeting.get('creator_account'), tags=tags,
prompt_id=meeting.get('prompt_id'),
prompt_name=meeting.get('prompt_name'),
overall_status=overall_status.get('overall_status'),
@ -725,7 +726,6 @@ async def upload_audio(
meeting_dir.mkdir(exist_ok=True)
unique_filename = f"{uuid.uuid4()}{file_extension}"
absolute_path = meeting_dir / unique_filename
relative_path = absolute_path.relative_to(BASE_DIR)
try:
with open(absolute_path, "wb") as buffer:
@ -733,17 +733,28 @@ async def upload_audio(
except Exception as e:
return create_api_response(code="500", message=f"保存文件失败: {str(e)}")
# 3.5 获取音频时长
audio_duration = 0
# 3.5 统一做音频预处理
try:
audio_duration = get_audio_duration(str(absolute_path))
print(f"音频时长: {audio_duration}")
preprocess_result = audio_preprocess_service.preprocess(absolute_path)
processed_absolute_path = preprocess_result.file_path
audio_duration = preprocess_result.metadata.duration_seconds
print(
f"音频预处理完成: source={absolute_path.name}, "
f"target={processed_absolute_path.name}, duration={audio_duration}s, "
f"applied={preprocess_result.applied}"
)
except Exception as e:
print(f"警告: 获取音频时长失败,但不影响后续流程: {e}")
if absolute_path.exists():
try:
os.remove(absolute_path)
except OSError:
pass
return create_api_response(code="500", message=f"音频预处理失败: {str(e)}")
file_path = '/' + str(relative_path)
file_name = audio_file.filename
file_size = audio_file.size
processed_relative_path = processed_absolute_path.relative_to(BASE_DIR)
file_path = '/' + str(processed_relative_path)
file_name = preprocess_result.file_name
file_size = preprocess_result.file_size
# 4. 调用 audio_service 处理文件(权限检查、数据库更新、启动转录)
result = handle_audio_upload(
@ -761,14 +772,26 @@ async def upload_audio(
# 如果不成功,删除已保存的文件并返回错误
if not result["success"]:
if absolute_path.exists():
try:
os.remove(absolute_path)
print(f"Deleted file due to processing error: {absolute_path}")
except Exception as e:
print(f"Warning: Failed to delete file {absolute_path}: {e}")
cleanup_paths = [processed_absolute_path]
if processed_absolute_path != absolute_path:
cleanup_paths.append(absolute_path)
for cleanup_path in cleanup_paths:
if cleanup_path.exists():
try:
os.remove(cleanup_path)
print(f"Deleted file due to processing error: {cleanup_path}")
except Exception as e:
print(f"Warning: Failed to delete file {cleanup_path}: {e}")
return result["response"]
if preprocess_result.applied and processed_absolute_path != absolute_path and absolute_path.exists():
try:
os.remove(absolute_path)
print(f"Deleted original uploaded audio after preprocessing: {absolute_path}")
except Exception as e:
print(f"Warning: Failed to delete original uploaded audio {absolute_path}: {e}")
# 5. 返回成功响应
transcription_task_id = result["transcription_task_id"]
message_suffix = ""

View File

@ -120,33 +120,8 @@ class SystemConfigService:
cfg["audio_scene"] = audio_row["audio_scene"]
if audio_row.get("hot_word_group_id") is not None:
cfg["hot_word_group_id"] = audio_row["hot_word_group_id"]
if audio_row.get("audio_scene") == "asr":
if extra_config.get("model") is None and audio_row.get("asr_model_name") is not None:
extra_config["model"] = audio_row["asr_model_name"]
if extra_config.get("vocabulary_id") is None and audio_row.get("asr_vocabulary_id") is not None:
extra_config["vocabulary_id"] = audio_row["asr_vocabulary_id"]
if extra_config.get("speaker_count") is None and audio_row.get("asr_speaker_count") is not None:
extra_config["speaker_count"] = audio_row["asr_speaker_count"]
if extra_config.get("language_hints") is None and audio_row.get("asr_language_hints"):
extra_config["language_hints"] = audio_row["asr_language_hints"]
if extra_config.get("disfluency_removal_enabled") is None and audio_row.get("asr_disfluency_removal_enabled") is not None:
extra_config["disfluency_removal_enabled"] = bool(audio_row["asr_disfluency_removal_enabled"])
if extra_config.get("diarization_enabled") is None and audio_row.get("asr_diarization_enabled") is not None:
extra_config["diarization_enabled"] = bool(audio_row["asr_diarization_enabled"])
else:
if extra_config.get("model") is None and audio_row.get("model_name"):
extra_config["model"] = audio_row["model_name"]
if extra_config.get("template_text") is None and audio_row.get("vp_template_text") is not None:
extra_config["template_text"] = audio_row["vp_template_text"]
if extra_config.get("duration_seconds") is None and audio_row.get("vp_duration_seconds") is not None:
extra_config["duration_seconds"] = audio_row["vp_duration_seconds"]
if extra_config.get("sample_rate") is None and audio_row.get("vp_sample_rate") is not None:
extra_config["sample_rate"] = audio_row["vp_sample_rate"]
if extra_config.get("channels") is None and audio_row.get("vp_channels") is not None:
extra_config["channels"] = audio_row["vp_channels"]
if extra_config.get("max_size_bytes") is None and audio_row.get("vp_max_size_bytes") is not None:
extra_config["max_size_bytes"] = audio_row["vp_max_size_bytes"]
if audio_row.get("request_timeout_seconds") is not None:
cfg["request_timeout_seconds"] = int(audio_row["request_timeout_seconds"])
language_hints = cls._normalize_string_list(extra_config.get("language_hints"))
if language_hints is not None:
@ -162,11 +137,8 @@ class SystemConfigService:
cursor = conn.cursor(dictionary=True)
cursor.execute(
"""
SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key, hot_word_group_id,
asr_model_name, asr_vocabulary_id, asr_speaker_count, asr_language_hints,
asr_disfluency_removal_enabled, asr_diarization_enabled,
vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes,
extra_config
SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key,
request_timeout_seconds, hot_word_group_id, extra_config
FROM audio_model_config
WHERE audio_scene = %s AND is_active = 1
ORDER BY is_default DESC, updated_at DESC, config_id ASC
@ -260,11 +232,8 @@ class SystemConfigService:
cursor.execute(
"""
SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key, hot_word_group_id,
asr_model_name, asr_vocabulary_id, asr_speaker_count, asr_language_hints,
asr_disfluency_removal_enabled, asr_diarization_enabled,
vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes,
extra_config
SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key,
request_timeout_seconds, hot_word_group_id, extra_config
FROM audio_model_config
WHERE model_code = %s AND is_active = 1
ORDER BY is_default DESC, config_id ASC
@ -427,28 +396,30 @@ class SystemConfigService:
cursor.execute(
"""
INSERT INTO audio_model_config
(model_code, model_name, audio_scene, provider, asr_model_name, asr_vocabulary_id, asr_speaker_count,
asr_language_hints, asr_disfluency_removal_enabled, asr_diarization_enabled, description, is_active, is_default)
(model_code, model_name, audio_scene, provider, request_timeout_seconds, extra_config, description, is_active, is_default)
VALUES (
'audio_model',
'音频识别模型',
'asr',
'dashscope',
'paraformer-v2',
%s,
10,
'zh,en',
1,
1,
300,
JSON_OBJECT(
'model', 'paraformer-v2',
'vocabulary_id', %s,
'speaker_count', 10,
'language_hints', JSON_ARRAY('zh', 'en'),
'disfluency_removal_enabled', TRUE,
'diarization_enabled', TRUE
),
'语音识别模型配置',
1,
1
)
ON DUPLICATE KEY UPDATE
asr_vocabulary_id = VALUES(asr_vocabulary_id),
extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s),
is_active = 1
""",
(str(value),),
(str(value), str(value)),
)
conn.commit()
cursor.close()
@ -626,7 +597,6 @@ class SystemConfigService:
audio_cfg = cls.get_active_audio_model_config("asr")
if audio_cfg.get("vocabulary_id"):
return audio_cfg["vocabulary_id"]
# 回退:直接读 audio_model_config.asr_vocabulary_id
audio_vocab = cls.get_config_attribute('audio_model', 'vocabulary_id')
if audio_vocab:
return audio_vocab

View File

@ -3,15 +3,16 @@
用于解析音频文件的元数据信息如时长采样率编码格式等
"""
from tinytag import TinyTag
import json
import shutil
import subprocess
def get_audio_duration(file_path: str) -> int:
"""
获取音频文件时长
使用TinyTag读取音频文件时长
使用 ffprobe 读取音频时长
Args:
file_path: 音频文件的完整路径
@ -26,13 +27,33 @@ def get_audio_duration(file_path: str) -> int:
- WAV (.wav)
- OGG (.ogg)
- FLAC (.flac)
- 以及TinyTag支持的其他音频格式
- 以及 ffprobe 支持的其他音频格式
"""
ffprobe_path = shutil.which("ffprobe")
if not ffprobe_path:
return 0
try:
tag = TinyTag.get(file_path)
if tag.duration and tag.duration > 0:
return int(tag.duration)
completed = subprocess.run(
[
ffprobe_path,
"-v",
"error",
"-print_format",
"json",
"-show_format",
str(file_path),
],
check=False,
capture_output=True,
text=True,
)
if completed.returncode == 0 and completed.stdout:
payload = json.loads(completed.stdout)
duration_value = (payload.get("format") or {}).get("duration")
if duration_value:
return int(float(duration_value))
except Exception as e:
print(f"获取音频时长失败 ({file_path}): {e}")
print(f"ffprobe 获取音频时长失败 ({file_path}): {e}")
return 0

View File

@ -22,6 +22,4 @@ psutil
# APK Parsing
pyaxmlparser
# Audio Metadata
tinytag
python-dotenv

View File

@ -0,0 +1,105 @@
SET @request_timeout_exists := (
SELECT COUNT(*)
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA = DATABASE()
AND TABLE_NAME = 'audio_model_config'
AND COLUMN_NAME = 'request_timeout_seconds'
);
SET @sql := IF(
@request_timeout_exists = 0,
'ALTER TABLE `audio_model_config` ADD COLUMN `request_timeout_seconds` int(11) NOT NULL DEFAULT 300 COMMENT ''音频转录请求超时(秒)'' AFTER `api_key`',
'SELECT 1'
);
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
SET @has_asr_legacy := (
SELECT COUNT(*)
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA = DATABASE()
AND TABLE_NAME = 'audio_model_config'
AND COLUMN_NAME = 'asr_model_name'
);
SET @sql := IF(
@has_asr_legacy > 0,
'UPDATE `audio_model_config`
SET `extra_config` = JSON_SET(
COALESCE(`extra_config`, JSON_OBJECT()),
''$.model'', `asr_model_name`,
''$.vocabulary_id'', `asr_vocabulary_id`,
''$.speaker_count'', `asr_speaker_count`,
''$.language_hints'', `asr_language_hints`,
''$.disfluency_removal_enabled'', `asr_disfluency_removal_enabled`,
''$.diarization_enabled'', `asr_diarization_enabled`
)
WHERE `audio_scene` = ''asr''',
'SELECT 1'
);
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
SET @has_voiceprint_legacy := (
SELECT COUNT(*)
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA = DATABASE()
AND TABLE_NAME = 'audio_model_config'
AND COLUMN_NAME = 'vp_template_text'
);
SET @sql := IF(
@has_voiceprint_legacy > 0,
'UPDATE `audio_model_config`
SET `extra_config` = JSON_SET(
COALESCE(`extra_config`, JSON_OBJECT()),
''$.template_text'', `vp_template_text`,
''$.duration_seconds'', `vp_duration_seconds`,
''$.sample_rate'', `vp_sample_rate`,
''$.channels'', `vp_channels`,
''$.max_size_bytes'', `vp_max_size_bytes`
)
WHERE `audio_scene` = ''voiceprint''',
'SELECT 1'
);
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
UPDATE `audio_model_config`
SET `request_timeout_seconds` = COALESCE(
NULLIF(`request_timeout_seconds`, 0),
CAST(JSON_UNQUOTE(JSON_EXTRACT(`extra_config`, '$.request_timeout_seconds')) AS UNSIGNED),
300
);
SET SESSION group_concat_max_len = 8192;
SELECT GROUP_CONCAT(CONCAT('DROP COLUMN `', COLUMN_NAME, '`') ORDER BY ORDINAL_POSITION SEPARATOR ', ')
INTO @drop_columns_sql
FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA = DATABASE()
AND TABLE_NAME = 'audio_model_config'
AND COLUMN_NAME IN (
'asr_model_name',
'asr_vocabulary_id',
'asr_speaker_count',
'asr_language_hints',
'asr_disfluency_removal_enabled',
'asr_diarization_enabled',
'vp_template_text',
'vp_duration_seconds',
'vp_sample_rate',
'vp_channels',
'vp_max_size_bytes'
);
SET @sql := IF(
@drop_columns_sql IS NULL OR @drop_columns_sql = '',
'SELECT 1',
CONCAT('ALTER TABLE `audio_model_config` ', @drop_columns_sql)
);
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TABLE IF EXISTS `ai_model_configs`;
DROP TABLE IF EXISTS `ai_model_config`;

View File

@ -3,3 +3,8 @@
# 组件
+ 数据库 mysql 5.7+ 10.100.51.51:3306 root | Unis@123
+ 缓存 redis 6.2 10.100.51.51:6379 Unis@123
# 升级前确认
+ 后端运行环境需提供 `ffmpeg``ffprobe`
+ 本次数据库升级包含 `backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql`
+ 升级后 `audio_model_config` 将新增 `request_timeout_seconds`,并清理旧的 ASR/声纹冗余列

View File

@ -172,7 +172,7 @@ export default function useAdminDashboardPage() {
const handleDownloadTranscript = async (meetingId) => {
try {
const response = await apiClient.get(buildApiUrl(`/api/meetings/${meetingId}/transcript`));
const response = await apiClient.get(buildApiUrl(API_ENDPOINTS.MEETINGS.TRANSCRIPT(meetingId)));
if (response.code === '200') {
const dataStr = JSON.stringify(response.data, null, 2);
const blob = new Blob([dataStr], { type: 'application/json' });
@ -190,6 +190,32 @@ export default function useAdminDashboardPage() {
}
};
const handleDownloadAudio = async (meetingId, audioFilePath) => {
try {
const response = await fetch(buildApiUrl(`${API_ENDPOINTS.MEETINGS.AUDIO(meetingId)}/stream`), {
credentials: 'include',
});
if (!response.ok) {
throw new Error(`audio download failed: ${response.status}`);
}
const blob = await response.blob();
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
const fileNameFromPath = audioFilePath?.split('/').pop();
const fallbackExtension = fileNameFromPath?.includes('.') ? '' : '.mp3';
link.href = url;
link.download = fileNameFromPath || `meeting_audio_${meetingId}${fallbackExtension}`;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
URL.revokeObjectURL(url);
} catch (error) {
console.error('下载音频失败:', error);
message.error('下载音频失败');
}
};
const closeMeetingModal = () => {
setShowMeetingModal(false);
setMeetingDetails(null);
@ -225,6 +251,7 @@ export default function useAdminDashboardPage() {
handleKickUser,
handleViewMeeting,
handleDownloadTranscript,
handleDownloadAudio,
closeMeetingModal,
taskCompletionRate,
};

View File

@ -55,6 +55,16 @@ const STATUS_MAP = {
};
const formatResourcePercent = (value) => `${Number(value || 0).toFixed(1)}%`;
const formatAudioDuration = (duration) => {
if (!duration && duration !== 0) {
return '无时长信息';
}
const totalSeconds = Math.max(0, Math.floor(Number(duration) || 0));
const minutes = Math.floor(totalSeconds / 60);
const seconds = totalSeconds % 60;
return `${minutes}${seconds}`;
};
const AdminDashboard = () => {
const {
@ -80,6 +90,7 @@ const AdminDashboard = () => {
handleKickUser,
handleViewMeeting,
handleDownloadTranscript,
handleDownloadAudio,
closeMeetingModal,
taskCompletionRate,
} = useAdminDashboardPage();
@ -167,6 +178,13 @@ const AdminDashboard = () => {
</Space>
),
},
{
title: '关联账号',
dataIndex: 'creator_name',
key: 'creator_name',
width: 140,
render: (text) => text || '-',
},
{
title: '状态',
dataIndex: 'status',
@ -384,14 +402,25 @@ const AdminDashboard = () => {
) : meetingDetails ? (
<Descriptions bordered column={1} size="small">
<Descriptions.Item label="会议名称">{meetingDetails.title}</Descriptions.Item>
<Descriptions.Item label="关联账号">{meetingDetails.creator_account || '-'}</Descriptions.Item>
<Descriptions.Item label="开始时间">
{meetingDetails.meeting_time ? new Date(meetingDetails.meeting_time).toLocaleString() : '-'}
</Descriptions.Item>
<Descriptions.Item label="使用模版">{meetingDetails.prompt_name || '默认模版'}</Descriptions.Item>
<Descriptions.Item label="音频信息">
{meetingDetails.audio_duration
? `${Math.floor(meetingDetails.audio_duration / 60)}${Math.floor(meetingDetails.audio_duration % 60)}`
: '无时长信息'}
<Space size="middle">
<span>{formatAudioDuration(meetingDetails.audio_duration)}</span>
{meetingDetails.audio_file_path ? (
<Button
type="link"
size="small"
style={{ padding: 0 }}
onClick={() => handleDownloadAudio(meetingDetails.meeting_id, meetingDetails.audio_file_path)}
>
下载音频
</Button>
) : null}
</Space>
</Descriptions.Item>
<Descriptions.Item label="操作">
<ActionButton tone="view" variant="textLg" icon={<FileTextOutlined />} onClick={() => handleDownloadTranscript(meetingDetails.meeting_id)}>

View File

@ -165,6 +165,7 @@ const ModelManagement = () => {
provider: values.provider,
endpoint_url: values.endpoint_url,
api_key: values.api_key,
request_timeout_seconds: values.request_timeout_seconds,
hot_word_group_id: values.hot_word_group_id || null,
extra_config: extraConfig,
description: values.description,
@ -233,6 +234,7 @@ const ModelManagement = () => {
endpoint_url: 'https://dashscope.aliyuncs.com/api/v1/services/audio/asr/transcription',
api_key: '',
service_model_name: 'paraformer-v2',
request_timeout_seconds: 300,
hot_word_group_id: undefined,
asr_speaker_count: 10,
asr_language_hints: 'zh,en',
@ -267,24 +269,25 @@ const ModelManagement = () => {
const extraConfig = normalizeAudioExtraConfig(row);
form.setFieldsValue({
...row,
request_timeout_seconds: row.request_timeout_seconds ?? 300,
hot_word_group_id: row.hot_word_group_id || undefined,
service_model_name: row.service_model_name || extraConfig.model || row.asr_model_name || row.model_name,
asr_speaker_count: extraConfig.speaker_count ?? row.asr_speaker_count,
service_model_name: row.service_model_name || extraConfig.model || row.model_name,
asr_speaker_count: extraConfig.speaker_count,
asr_language_hints: Array.isArray(extraConfig.language_hints)
? extraConfig.language_hints.join(',')
: extraConfig.language_hints || row.asr_language_hints,
asr_disfluency_removal_enabled: extraConfig.disfluency_removal_enabled ?? Boolean(row.asr_disfluency_removal_enabled),
asr_diarization_enabled: extraConfig.diarization_enabled ?? Boolean(row.asr_diarization_enabled),
: extraConfig.language_hints,
asr_disfluency_removal_enabled: extraConfig.disfluency_removal_enabled ?? false,
asr_diarization_enabled: extraConfig.diarization_enabled ?? false,
asr_timestamp_alignment_enabled: extraConfig.timestamp_alignment_enabled ?? false,
asr_channel_id: Array.isArray(extraConfig.channel_id) ? extraConfig.channel_id.join(',') : extraConfig.channel_id,
asr_special_word_filter: extraConfig.special_word_filter,
asr_audio_event_detection_enabled: extraConfig.audio_event_detection_enabled ?? false,
asr_phrase_id: extraConfig.phrase_id,
vp_template_text: extraConfig.template_text ?? row.vp_template_text,
vp_duration_seconds: extraConfig.duration_seconds ?? row.vp_duration_seconds,
vp_sample_rate: extraConfig.sample_rate ?? row.vp_sample_rate,
vp_channels: extraConfig.channels ?? row.vp_channels,
vp_max_size_bytes: extraConfig.max_size_bytes ?? row.vp_max_size_bytes,
vp_template_text: extraConfig.template_text,
vp_duration_seconds: extraConfig.duration_seconds,
vp_sample_rate: extraConfig.sample_rate,
vp_channels: extraConfig.channels,
vp_max_size_bytes: extraConfig.max_size_bytes,
});
}
setDrawerOpen(true);
@ -419,10 +422,10 @@ const ModelManagement = () => {
key: 'core',
render: (_, row) => {
const extraConfig = normalizeAudioExtraConfig(row);
const serviceModelName = row.service_model_name || extraConfig.model || row.asr_model_name || row.model_name;
const serviceModelName = row.service_model_name || extraConfig.model || row.model_name;
return row.audio_scene === 'voiceprint'
? `模型=${serviceModelName || '-'} 时长=${extraConfig.duration_seconds || row.vp_duration_seconds || '-'}s 采样=${extraConfig.sample_rate || row.vp_sample_rate || '-'}`
: `模型=${serviceModelName || '-'} 热词组=${row.hot_word_group_name || '未关联'}`;
? `模型=${serviceModelName || '-'} 时长=${extraConfig.duration_seconds || '-'}s 采样=${extraConfig.sample_rate || '-'}`
: `模型=${serviceModelName || '-'} 超时=${row.request_timeout_seconds || 300}s 热词组=${row.hot_word_group_name || '未关联'}`;
},
},
{ title: '状态', dataIndex: 'is_active', key: 'is_active', width: 90, render: (v) => <StatusTag active={v} /> },
@ -628,6 +631,9 @@ const ModelManagement = () => {
</>
) : (
<>
<Form.Item name="request_timeout_seconds" label="转录超时(秒)" rules={[{ required: true, message: '请输入超时秒数' }]}>
<InputNumber min={10} max={3600} />
</Form.Item>
<Form.Item name="hot_word_group_id" label="热词组">
<Select
allowClear