From 3fe28934cc60254791d324932212ad0f823aa076 Mon Sep 17 00:00:00 2001 From: "mula.liu" Date: Thu, 9 Apr 2026 17:51:34 +0800 Subject: [PATCH] v1.1.1 --- DOCKER_README.md | 8 + backend/Dockerfile | 1 + backend/app/api/endpoints/admin_settings.py | 12 +- backend/app/api/endpoints/audio.py | 48 ++++- backend/app/api/endpoints/hot_words.py | 8 +- backend/app/models/models.py | 1 + .../app/services/admin_settings_service.py | 144 ++------------ .../services/async_transcription_service.py | 68 ++++++- .../app/services/audio_preprocess_service.py | 188 ++++++++++++++++++ backend/app/services/meeting_service.py | 59 ++++-- backend/app/services/system_config_service.py | 66 ++---- backend/app/utils/audio_parser.py | 37 +++- backend/requirements.txt | 2 - ...model_config_and_drop_legacy_ai_tables.sql | 105 ++++++++++ deploy.md | 7 +- frontend/src/hooks/useAdminDashboardPage.js | 29 ++- frontend/src/pages/AdminDashboard.jsx | 35 +++- frontend/src/pages/admin/ModelManagement.jsx | 32 +-- 18 files changed, 590 insertions(+), 260 deletions(-) create mode 100644 backend/app/services/audio_preprocess_service.py create mode 100644 backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql diff --git a/DOCKER_README.md b/DOCKER_README.md index 17501db..05da3a7 100644 --- a/DOCKER_README.md +++ b/DOCKER_README.md @@ -52,6 +52,9 @@ vim .env # 配置七牛云、LLM密钥等 - ✅ 启动所有服务 - ✅ 等待健康检查 +说明: +- 后端镜像现在依赖系统级 `ffmpeg/ffprobe` 做音频预处理,已在 `backend/Dockerfile` 中安装,无需宿主机额外安装。 + ### 方式二:手动启动 ```bash @@ -119,6 +122,11 @@ HTTPS_PORT=443 # HTTP_PORT=80 ``` +### 音频预处理依赖 + +- Docker 部署:后端容器内已安装 `ffmpeg` +- 非 Docker 部署:请确保服务器可执行 `ffmpeg` 和 `ffprobe` + ## 📦 数据目录 所有数据存储在 `./data/` 目录: diff --git a/backend/Dockerfile b/backend/Dockerfile index 762e769..8f280f2 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -19,6 +19,7 @@ COPY requirements.txt . RUN apt-get update && apt-get install -y \ gcc \ curl \ + ffmpeg \ default-libmysqlclient-dev \ pkg-config \ && pip install --index-url https://mirrors.aliyun.com/pypi/simple --no-cache-dir -r requirements.txt \ diff --git a/backend/app/api/endpoints/admin_settings.py b/backend/app/api/endpoints/admin_settings.py index 89065e3..7e3108e 100644 --- a/backend/app/api/endpoints/admin_settings.py +++ b/backend/app/api/endpoints/admin_settings.py @@ -44,19 +44,9 @@ class AudioModelUpsertRequest(BaseModel): provider: str | None = None endpoint_url: str | None = None api_key: str | None = None + request_timeout_seconds: int = 300 extra_config: dict[str, Any] | None = None - asr_model_name: str | None = None - asr_vocabulary_id: str | None = None hot_word_group_id: int | None = None - asr_speaker_count: int | None = None - asr_language_hints: str | None = None - asr_disfluency_removal_enabled: bool | None = None - asr_diarization_enabled: bool | None = None - vp_template_text: str | None = None - vp_duration_seconds: int | None = None - vp_sample_rate: int | None = None - vp_channels: int | None = None - vp_max_size_bytes: int | None = None description: str | None = None is_active: bool = True is_default: bool = False diff --git a/backend/app/api/endpoints/audio.py b/backend/app/api/endpoints/audio.py index aaef6ba..ad9ee59 100644 --- a/backend/app/api/endpoints/audio.py +++ b/backend/app/api/endpoints/audio.py @@ -5,8 +5,8 @@ from app.core.auth import get_current_user from app.core.response import create_api_response from app.services.async_transcription_service import AsyncTranscriptionService from app.services.async_meeting_service import async_meeting_service +from app.services.audio_preprocess_service import audio_preprocess_service from app.services.audio_service import handle_audio_upload -from app.utils.audio_parser import get_audio_duration from pydantic import BaseModel from typing import Optional, List from datetime import datetime, timedelta @@ -456,18 +456,30 @@ async def complete_upload( } ) - # 6. 获取文件信息 + # 6. 对合并后的音频执行统一预处理 full_path = BASE_DIR / file_path.lstrip('/') - file_size = full_path.stat().st_size - file_name = full_path.name - - # 6.5 获取音频时长 - audio_duration = 0 try: - audio_duration = get_audio_duration(str(full_path)) - print(f"音频时长: {audio_duration}秒") + preprocess_result = audio_preprocess_service.preprocess(full_path) + processed_full_path = preprocess_result.file_path + file_size = preprocess_result.file_size + file_name = preprocess_result.file_name + audio_duration = preprocess_result.metadata.duration_seconds + file_path = f"/{processed_full_path.relative_to(BASE_DIR)}" + print( + f"流式上传音频预处理完成: source={full_path.name}, " + f"target={processed_full_path.name}, duration={audio_duration}s, " + f"applied={preprocess_result.applied}" + ) except Exception as e: - print(f"警告: 获取音频时长失败,但不影响后续流程: {e}") + if full_path.exists(): + try: + os.remove(full_path) + except OSError: + pass + return create_api_response( + code="500", + message=f"音频预处理失败: {str(e)}" + ) # 7. 调用 audio_service 处理文件(数据库更新、启动转录和总结) result = handle_audio_upload( @@ -484,8 +496,24 @@ async def complete_upload( # 如果处理失败,返回错误 if not result["success"]: + cleanup_paths = [processed_full_path] + if processed_full_path != full_path: + cleanup_paths.append(full_path) + + for cleanup_path in cleanup_paths: + if cleanup_path.exists(): + try: + os.remove(cleanup_path) + except OSError: + pass return result["response"] + if preprocess_result.applied and processed_full_path != full_path and full_path.exists(): + try: + os.remove(full_path) + except OSError: + pass + # 8. 返回成功响应 transcription_task_id = result["transcription_task_id"] message_suffix = "" diff --git a/backend/app/api/endpoints/hot_words.py b/backend/app/api/endpoints/hot_words.py index 28aef91..3829eff 100644 --- a/backend/app/api/endpoints/hot_words.py +++ b/backend/app/api/endpoints/hot_words.py @@ -117,7 +117,6 @@ async def delete_group(id: int, current_user: dict = Depends(get_current_admin_u """ UPDATE audio_model_config SET hot_word_group_id = NULL, - asr_vocabulary_id = NULL, extra_config = JSON_REMOVE(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id') WHERE hot_word_group_id = %s """, @@ -190,15 +189,14 @@ async def sync_group(id: int, current_user: dict = Depends(get_current_admin_use (vocab_id, id), ) - # 更新关联该组的所有 audio_model_config.asr_vocabulary_id + # 更新关联该组的所有 audio_model_config.extra_config.vocabulary_id cursor.execute( """ UPDATE audio_model_config - SET asr_vocabulary_id = %s, - extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s) + SET extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s) WHERE hot_word_group_id = %s """, - (vocab_id, vocab_id, id), + (vocab_id, id), ) conn.commit() diff --git a/backend/app/models/models.py b/backend/app/models/models.py index 1938628..21fb960 100644 --- a/backend/app/models/models.py +++ b/backend/app/models/models.py @@ -76,6 +76,7 @@ class Meeting(BaseModel): description: Optional[str] = None creator_id: int creator_username: str + creator_account: Optional[str] = None created_at: datetime.datetime attendees: List[AttendeeInfo] attendee_ids: Optional[List[int]] = None diff --git a/backend/app/services/admin_settings_service.py b/backend/app/services/admin_settings_service.py index 9e6bc28..5c1e49a 100644 --- a/backend/app/services/admin_settings_service.py +++ b/backend/app/services/admin_settings_service.py @@ -92,24 +92,12 @@ def _merge_audio_extra_config(request, vocabulary_id: str | None = None) -> dict extra_config = _parse_json_object(request.extra_config) if request.audio_scene == "asr": - legacy_config = { - "model": request.asr_model_name, - "speaker_count": request.asr_speaker_count, - "language_hints": request.asr_language_hints, - "disfluency_removal_enabled": request.asr_disfluency_removal_enabled, - "diarization_enabled": request.asr_diarization_enabled, - } + if vocabulary_id: + extra_config["vocabulary_id"] = vocabulary_id else: - legacy_config = { - "model": request.model_name, - "template_text": request.vp_template_text, - "duration_seconds": request.vp_duration_seconds, - "sample_rate": request.vp_sample_rate, - "channels": request.vp_channels, - "max_size_bytes": request.vp_max_size_bytes, - } + extra_config.pop("vocabulary_id", None) - merged = {**legacy_config, **extra_config} + merged = dict(extra_config) language_hints = _normalize_string_list(merged.get("language_hints")) if language_hints is not None: @@ -119,94 +107,20 @@ def _merge_audio_extra_config(request, vocabulary_id: str | None = None) -> dict if channel_id is not None: merged["channel_id"] = channel_id - resolved_vocabulary_id = vocabulary_id or merged.get("vocabulary_id") or request.asr_vocabulary_id - if request.audio_scene == "asr" and resolved_vocabulary_id: - merged["vocabulary_id"] = resolved_vocabulary_id - return _clean_extra_config(merged) -def _extract_legacy_audio_columns(audio_scene: str, extra_config: dict[str, Any]) -> dict[str, Any]: - extra_config = _parse_json_object(extra_config) - columns = { - "asr_model_name": None, - "asr_vocabulary_id": None, - "asr_speaker_count": None, - "asr_language_hints": None, - "asr_disfluency_removal_enabled": None, - "asr_diarization_enabled": None, - "vp_template_text": None, - "vp_duration_seconds": None, - "vp_sample_rate": None, - "vp_channels": None, - "vp_max_size_bytes": None, - } - - if audio_scene == "asr": - language_hints = extra_config.get("language_hints") - if isinstance(language_hints, list): - language_hints = ",".join(str(item).strip() for item in language_hints if str(item).strip()) - columns.update( - { - "asr_model_name": extra_config.get("model"), - "asr_vocabulary_id": extra_config.get("vocabulary_id"), - "asr_speaker_count": extra_config.get("speaker_count"), - "asr_language_hints": language_hints, - "asr_disfluency_removal_enabled": 1 if extra_config.get("disfluency_removal_enabled") is True else 0 if extra_config.get("disfluency_removal_enabled") is False else None, - "asr_diarization_enabled": 1 if extra_config.get("diarization_enabled") is True else 0 if extra_config.get("diarization_enabled") is False else None, - } - ) - else: - columns.update( - { - "vp_template_text": extra_config.get("template_text"), - "vp_duration_seconds": extra_config.get("duration_seconds"), - "vp_sample_rate": extra_config.get("sample_rate"), - "vp_channels": extra_config.get("channels"), - "vp_max_size_bytes": extra_config.get("max_size_bytes"), - } - ) - - return columns - - def _normalize_audio_row(row: dict[str, Any]) -> dict[str, Any]: extra_config = _parse_json_object(row.get("extra_config")) - if row.get("audio_scene") == "asr": - if extra_config.get("model") is None and row.get("asr_model_name") is not None: - extra_config["model"] = row["asr_model_name"] - if extra_config.get("vocabulary_id") is None and row.get("asr_vocabulary_id") is not None: - extra_config["vocabulary_id"] = row["asr_vocabulary_id"] - if extra_config.get("speaker_count") is None and row.get("asr_speaker_count") is not None: - extra_config["speaker_count"] = row["asr_speaker_count"] - if extra_config.get("language_hints") is None and row.get("asr_language_hints"): - extra_config["language_hints"] = _normalize_string_list(row["asr_language_hints"]) - if extra_config.get("disfluency_removal_enabled") is None and row.get("asr_disfluency_removal_enabled") is not None: - extra_config["disfluency_removal_enabled"] = bool(row["asr_disfluency_removal_enabled"]) - if extra_config.get("diarization_enabled") is None and row.get("asr_diarization_enabled") is not None: - extra_config["diarization_enabled"] = bool(row["asr_diarization_enabled"]) - else: - if extra_config.get("model") is None and row.get("model_name"): - extra_config["model"] = row["model_name"] - if extra_config.get("template_text") is None and row.get("vp_template_text") is not None: - extra_config["template_text"] = row["vp_template_text"] - if extra_config.get("duration_seconds") is None and row.get("vp_duration_seconds") is not None: - extra_config["duration_seconds"] = row["vp_duration_seconds"] - if extra_config.get("sample_rate") is None and row.get("vp_sample_rate") is not None: - extra_config["sample_rate"] = row["vp_sample_rate"] - if extra_config.get("channels") is None and row.get("vp_channels") is not None: - extra_config["channels"] = row["vp_channels"] - if extra_config.get("max_size_bytes") is None and row.get("vp_max_size_bytes") is not None: - extra_config["max_size_bytes"] = row["vp_max_size_bytes"] - row["extra_config"] = extra_config row["service_model_name"] = extra_config.get("model") + row["request_timeout_seconds"] = int(row.get("request_timeout_seconds") or 300) return row def _resolve_hot_word_vocabulary_id(cursor, request) -> str | None: - vocabulary_id = request.asr_vocabulary_id + vocabulary_id = _parse_json_object(request.extra_config).get("vocabulary_id") if request.hot_word_group_id: cursor.execute("SELECT vocabulary_id FROM hot_word_group WHERE id = %s", (request.hot_word_group_id,)) group_row = cursor.fetchone() @@ -482,10 +396,8 @@ def list_audio_model_configs(scene: str = "all"): cursor = conn.cursor(dictionary=True) sql = """ SELECT a.config_id, a.model_code, a.model_name, a.audio_scene, a.provider, a.endpoint_url, a.api_key, - a.asr_model_name, a.asr_vocabulary_id, a.hot_word_group_id, a.asr_speaker_count, a.asr_language_hints, - a.asr_disfluency_removal_enabled, a.asr_diarization_enabled, - a.vp_template_text, a.vp_duration_seconds, a.vp_sample_rate, a.vp_channels, a.vp_max_size_bytes, - a.extra_config, a.description, a.is_active, a.is_default, a.created_at, a.updated_at, + a.request_timeout_seconds, a.hot_word_group_id, a.extra_config, + a.description, a.is_active, a.is_default, a.created_at, a.updated_at, g.name AS hot_word_group_name, g.vocabulary_id AS hot_word_group_vocab_id FROM audio_model_config a LEFT JOIN hot_word_group g ON g.id = a.hot_word_group_id @@ -524,17 +436,13 @@ def create_audio_model_config(request): asr_vocabulary_id = _resolve_hot_word_vocabulary_id(cursor, request) extra_config = _merge_audio_extra_config(request, vocabulary_id=asr_vocabulary_id) - legacy_columns = _extract_legacy_audio_columns(request.audio_scene, extra_config) cursor.execute( """ INSERT INTO audio_model_config (model_code, model_name, audio_scene, provider, endpoint_url, api_key, - asr_model_name, asr_vocabulary_id, hot_word_group_id, asr_speaker_count, asr_language_hints, - asr_disfluency_removal_enabled, asr_diarization_enabled, - vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes, - extra_config, description, is_active, is_default) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + request_timeout_seconds, hot_word_group_id, extra_config, description, is_active, is_default) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, ( request.model_code, @@ -543,18 +451,8 @@ def create_audio_model_config(request): request.provider, request.endpoint_url, request.api_key, - legacy_columns["asr_model_name"], - legacy_columns["asr_vocabulary_id"], + request.request_timeout_seconds, request.hot_word_group_id, - legacy_columns["asr_speaker_count"], - legacy_columns["asr_language_hints"], - legacy_columns["asr_disfluency_removal_enabled"], - legacy_columns["asr_diarization_enabled"], - legacy_columns["vp_template_text"], - legacy_columns["vp_duration_seconds"], - legacy_columns["vp_sample_rate"], - legacy_columns["vp_channels"], - legacy_columns["vp_max_size_bytes"], json.dumps(extra_config, ensure_ascii=False), request.description, 1 if request.is_active else 0, @@ -594,16 +492,13 @@ def update_audio_model_config(model_code: str, request): asr_vocabulary_id = _resolve_hot_word_vocabulary_id(cursor, request) extra_config = _merge_audio_extra_config(request, vocabulary_id=asr_vocabulary_id) - legacy_columns = _extract_legacy_audio_columns(request.audio_scene, extra_config) cursor.execute( """ UPDATE audio_model_config SET model_code = %s, model_name = %s, audio_scene = %s, provider = %s, endpoint_url = %s, api_key = %s, - asr_model_name = %s, asr_vocabulary_id = %s, hot_word_group_id = %s, asr_speaker_count = %s, asr_language_hints = %s, - asr_disfluency_removal_enabled = %s, asr_diarization_enabled = %s, - vp_template_text = %s, vp_duration_seconds = %s, vp_sample_rate = %s, vp_channels = %s, vp_max_size_bytes = %s, - extra_config = %s, description = %s, is_active = %s, is_default = %s + request_timeout_seconds = %s, hot_word_group_id = %s, extra_config = %s, + description = %s, is_active = %s, is_default = %s WHERE model_code = %s """, ( @@ -613,18 +508,8 @@ def update_audio_model_config(model_code: str, request): request.provider, request.endpoint_url, request.api_key, - legacy_columns["asr_model_name"], - legacy_columns["asr_vocabulary_id"], + request.request_timeout_seconds, request.hot_word_group_id, - legacy_columns["asr_speaker_count"], - legacy_columns["asr_language_hints"], - legacy_columns["asr_disfluency_removal_enabled"], - legacy_columns["asr_diarization_enabled"], - legacy_columns["vp_template_text"], - legacy_columns["vp_duration_seconds"], - legacy_columns["vp_sample_rate"], - legacy_columns["vp_channels"], - legacy_columns["vp_max_size_bytes"], json.dumps(extra_config, ensure_ascii=False), request.description, 1 if request.is_active else 0, @@ -693,6 +578,7 @@ def test_audio_model_config(request): "api_key": request.api_key, "audio_scene": request.audio_scene, "hot_word_group_id": request.hot_word_group_id, + "request_timeout_seconds": request.request_timeout_seconds, **extra_config, } result = transcription_service.test_asr_model(runtime_config, test_file_url=request.test_file_url) diff --git a/backend/app/services/async_transcription_service.py b/backend/app/services/async_transcription_service.py index 42ccdac..02ab13a 100644 --- a/backend/app/services/async_transcription_service.py +++ b/backend/app/services/async_transcription_service.py @@ -14,6 +14,19 @@ from app.core.database import get_db_connection from app.services.system_config_service import SystemConfigService +class _DefaultTimeoutSession(requests.Session): + """为 requests.Session 注入默认超时。""" + + def __init__(self, default_timeout: Optional[int] = None): + super().__init__() + self.default_timeout = default_timeout + + def request(self, method, url, **kwargs): + if "timeout" not in kwargs and self.default_timeout: + kwargs["timeout"] = self.default_timeout + return super().request(method, url, **kwargs) + + class AsyncTranscriptionService: """异步转录服务类""" @@ -23,8 +36,8 @@ class AsyncTranscriptionService: self.base_url = APP_CONFIG['base_url'] @staticmethod - def _create_requests_session() -> requests.Session: - session = requests.Session() + def _create_requests_session(default_timeout: Optional[int] = None) -> requests.Session: + session = _DefaultTimeoutSession(default_timeout=default_timeout) session.trust_env = os.getenv("IMEETING_USE_SYSTEM_PROXY", "").lower() in {"1", "true", "yes", "on"} return session @@ -57,6 +70,35 @@ class AsyncTranscriptionService: request_options["base_address"] = base_address return request_options + @staticmethod + def _resolve_request_timeout_seconds(audio_config: Optional[Dict[str, Any]] = None) -> int: + value = (audio_config or {}).get("request_timeout_seconds") + try: + timeout_seconds = int(value) + except (TypeError, ValueError): + timeout_seconds = 300 + return max(10, timeout_seconds) + + def _dashscope_async_call(self, request_options: Dict[str, Any], call_params: Dict[str, Any], timeout_seconds: int): + session = self._create_requests_session(timeout_seconds) + try: + try: + return Transcription.async_call(session=session, **request_options, **call_params) + except TypeError: + return Transcription.async_call(**request_options, **call_params) + finally: + session.close() + + def _dashscope_fetch(self, paraformer_task_id: str, request_options: Dict[str, Any], timeout_seconds: int): + session = self._create_requests_session(timeout_seconds) + try: + try: + return Transcription.fetch(task=paraformer_task_id, session=session, **request_options) + except TypeError: + return Transcription.fetch(task=paraformer_task_id, **request_options) + finally: + session.close() + @staticmethod def _build_dashscope_call_params(audio_config: Dict[str, Any], file_url: str) -> Dict[str, Any]: model_name = audio_config.get("model") or "paraformer-v2" @@ -93,13 +135,14 @@ class AsyncTranscriptionService: raise Exception(f"当前仅支持 DashScope 音频识别测试,暂不支持供应商: {provider}") request_options = self._build_dashscope_request_options(audio_config) + timeout_seconds = self._resolve_request_timeout_seconds(audio_config) dashscope.api_key = request_options["api_key"] target_file_url = ( test_file_url or "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav" ) call_params = self._build_dashscope_call_params(audio_config, target_file_url) - response = Transcription.async_call(**request_options, **call_params) + response = self._dashscope_async_call(request_options, call_params, timeout_seconds) if response.status_code != HTTPStatus.OK: raise Exception(response.message or "音频模型测试失败") @@ -154,6 +197,7 @@ class AsyncTranscriptionService: raise Exception(f"当前仅支持 DashScope 音频识别,暂不支持供应商: {provider}") request_options = self._build_dashscope_request_options(audio_config) + timeout_seconds = self._resolve_request_timeout_seconds(audio_config) dashscope.api_key = request_options["api_key"] call_params = self._build_dashscope_call_params(audio_config, file_url) @@ -164,7 +208,7 @@ class AsyncTranscriptionService: ) # 3. 调用Paraformer异步API - task_response = Transcription.async_call(**request_options, **call_params) + task_response = self._dashscope_async_call(request_options, call_params, timeout_seconds) if task_response.status_code != HTTPStatus.OK: print(f"Failed to start transcription: {task_response.status_code}, {task_response.message}") @@ -238,11 +282,11 @@ class AsyncTranscriptionService: # 2. 查询外部API获取状态 try: - request_options = self._build_dashscope_request_options( - SystemConfigService.get_active_audio_model_config("asr") - ) + audio_config = SystemConfigService.get_active_audio_model_config("asr") + request_options = self._build_dashscope_request_options(audio_config) + timeout_seconds = self._resolve_request_timeout_seconds(audio_config) dashscope.api_key = request_options["api_key"] - paraformer_response = Transcription.fetch(task=paraformer_task_id, **request_options) + paraformer_response = self._dashscope_fetch(paraformer_task_id, request_options, timeout_seconds) if paraformer_response.status_code != HTTPStatus.OK: raise Exception(f"Failed to fetch task status from provider: {paraformer_response.message}") @@ -559,10 +603,12 @@ class AsyncTranscriptionService: transcription_url = paraformer_output['results'][0]['transcription_url'] print(f"Fetching transcription from URL: {transcription_url}") - - session = self._create_requests_session() + + audio_config = SystemConfigService.get_active_audio_model_config("asr") + timeout_seconds = self._resolve_request_timeout_seconds(audio_config) + session = self._create_requests_session(timeout_seconds) try: - response = session.get(transcription_url) + response = session.get(transcription_url, timeout=timeout_seconds) finally: session.close() response.raise_for_status() diff --git a/backend/app/services/audio_preprocess_service.py b/backend/app/services/audio_preprocess_service.py new file mode 100644 index 0000000..84c1a01 --- /dev/null +++ b/backend/app/services/audio_preprocess_service.py @@ -0,0 +1,188 @@ +""" +音频预处理服务 + +使用 ffprobe/ffmpeg 对上传音频做统一探测和规范化,降低长会议音频的格式兼容风险。 +当前阶段只做单文件预处理,不做拆片。 +""" +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Optional +import json +import shutil +import subprocess + +from app.utils.audio_parser import get_audio_duration + + +@dataclass +class AudioMetadata: + """音频元数据""" + + duration_seconds: int = 0 + sample_rate: Optional[int] = None + channels: Optional[int] = None + codec_name: Optional[str] = None + format_name: Optional[str] = None + bit_rate: Optional[int] = None + + +@dataclass +class AudioPreprocessResult: + """音频预处理结果""" + + file_path: Path + file_name: str + file_size: int + metadata: AudioMetadata + applied: bool = False + output_format: Optional[str] = None + + +class AudioPreprocessService: + """基于 ffmpeg 的音频预处理服务""" + + TARGET_EXTENSION = ".m4a" + TARGET_SAMPLE_RATE = 16000 + TARGET_CHANNELS = 1 + TARGET_BITRATE = "64k" + + def __init__(self): + self.ffmpeg_path = shutil.which("ffmpeg") + self.ffprobe_path = shutil.which("ffprobe") + + def probe_audio(self, file_path: str | Path) -> AudioMetadata: + """ + 使用 ffprobe 探测音频元数据。 + """ + path = Path(file_path) + if not path.exists(): + raise FileNotFoundError(f"音频文件不存在: {path}") + + if self.ffprobe_path: + metadata = self._probe_with_ffprobe(path) + if metadata: + return metadata + + return AudioMetadata(duration_seconds=get_audio_duration(str(path))) + + def preprocess(self, file_path: str | Path) -> AudioPreprocessResult: + """ + 预处理音频为统一格式。 + + 当前策略: + 1. 去除视频流,仅保留音频 + 2. 统一单声道 + 3. 统一采样率 16k + 4. 转为 m4a(aac) + """ + source_path = Path(file_path) + if not source_path.exists(): + raise FileNotFoundError(f"音频文件不存在: {source_path}") + + if not self.ffmpeg_path: + metadata = self.probe_audio(source_path) + return AudioPreprocessResult( + file_path=source_path, + file_name=source_path.name, + file_size=source_path.stat().st_size, + metadata=metadata, + applied=False, + output_format=source_path.suffix.lower().lstrip(".") or None, + ) + + output_path = source_path.with_name(f"{source_path.stem}_normalized{self.TARGET_EXTENSION}") + temp_output_path = output_path.with_name(f"{output_path.stem}.tmp{output_path.suffix}") + + command = [ + self.ffmpeg_path, + "-y", + "-i", + str(source_path), + "-vn", + "-ac", + str(self.TARGET_CHANNELS), + "-ar", + str(self.TARGET_SAMPLE_RATE), + "-c:a", + "aac", + "-b:a", + self.TARGET_BITRATE, + "-movflags", + "+faststart", + str(temp_output_path), + ] + + try: + completed = subprocess.run( + command, + check=False, + capture_output=True, + text=True, + ) + if completed.returncode != 0: + stderr = (completed.stderr or "").strip() + raise RuntimeError(stderr or "ffmpeg 预处理失败") + + temp_output_path.replace(output_path) + metadata = self.probe_audio(output_path) + return AudioPreprocessResult( + file_path=output_path, + file_name=output_path.name, + file_size=output_path.stat().st_size, + metadata=metadata, + applied=True, + output_format=output_path.suffix.lower().lstrip("."), + ) + finally: + if temp_output_path.exists(): + temp_output_path.unlink() + + def _probe_with_ffprobe(self, file_path: Path) -> Optional[AudioMetadata]: + command = [ + self.ffprobe_path, + "-v", + "error", + "-print_format", + "json", + "-show_streams", + "-show_format", + str(file_path), + ] + + try: + completed = subprocess.run( + command, + check=False, + capture_output=True, + text=True, + ) + if completed.returncode != 0 or not completed.stdout: + return None + + payload = json.loads(completed.stdout) + streams = payload.get("streams") or [] + audio_stream = next((stream for stream in streams if stream.get("codec_type") == "audio"), {}) + format_info = payload.get("format") or {} + + duration_value = audio_stream.get("duration") or format_info.get("duration") + duration_seconds = int(float(duration_value)) if duration_value else 0 + + sample_rate_value = audio_stream.get("sample_rate") + channels_value = audio_stream.get("channels") + bit_rate_value = audio_stream.get("bit_rate") or format_info.get("bit_rate") + + return AudioMetadata( + duration_seconds=duration_seconds, + sample_rate=int(sample_rate_value) if sample_rate_value else None, + channels=int(channels_value) if channels_value else None, + codec_name=audio_stream.get("codec_name"), + format_name=format_info.get("format_name"), + bit_rate=int(bit_rate_value) if bit_rate_value else None, + ) + except Exception: + return None + + +audio_preprocess_service = AudioPreprocessService() diff --git a/backend/app/services/meeting_service.py b/backend/app/services/meeting_service.py index 1b1a4c8..2185726 100644 --- a/backend/app/services/meeting_service.py +++ b/backend/app/services/meeting_service.py @@ -8,8 +8,8 @@ from app.services.llm_service import LLMService from app.services.async_transcription_service import AsyncTranscriptionService from app.services.async_meeting_service import async_meeting_service from app.services.audio_service import handle_audio_upload +from app.services.audio_preprocess_service import audio_preprocess_service from app.services.system_config_service import SystemConfigService -from app.utils.audio_parser import get_audio_duration from app.core.auth import get_current_user, get_optional_current_user from app.core.response import create_api_response from typing import Any, Dict, List, Optional @@ -479,7 +479,7 @@ def get_meeting_details(meeting_id: int, current_user: dict = Depends(get_curren cursor = connection.cursor(dictionary=True) query = ''' SELECT m.meeting_id, m.title, m.meeting_time, m.summary, m.created_at, m.tags, - m.user_id as creator_id, u.caption as creator_username, m.prompt_id, + m.user_id as creator_id, u.caption as creator_username, u.username as creator_account, m.prompt_id, af.file_path as audio_file_path, af.duration as audio_duration, p.name as prompt_name, m.access_password FROM meetings m @@ -505,7 +505,8 @@ def get_meeting_details(meeting_id: int, current_user: dict = Depends(get_curren meeting_id=meeting['meeting_id'], title=meeting['title'], meeting_time=meeting['meeting_time'], summary=meeting['summary'], created_at=meeting['created_at'], attendees=attendees, attendee_ids=[row['user_id'] for row in attendees_data], - creator_id=meeting['creator_id'], creator_username=meeting['creator_username'], tags=tags, + creator_id=meeting['creator_id'], creator_username=meeting['creator_username'], + creator_account=meeting.get('creator_account'), tags=tags, prompt_id=meeting.get('prompt_id'), prompt_name=meeting.get('prompt_name'), overall_status=overall_status.get('overall_status'), @@ -725,7 +726,6 @@ async def upload_audio( meeting_dir.mkdir(exist_ok=True) unique_filename = f"{uuid.uuid4()}{file_extension}" absolute_path = meeting_dir / unique_filename - relative_path = absolute_path.relative_to(BASE_DIR) try: with open(absolute_path, "wb") as buffer: @@ -733,17 +733,28 @@ async def upload_audio( except Exception as e: return create_api_response(code="500", message=f"保存文件失败: {str(e)}") - # 3.5 获取音频时长 - audio_duration = 0 + # 3.5 统一做音频预处理 try: - audio_duration = get_audio_duration(str(absolute_path)) - print(f"音频时长: {audio_duration}秒") + preprocess_result = audio_preprocess_service.preprocess(absolute_path) + processed_absolute_path = preprocess_result.file_path + audio_duration = preprocess_result.metadata.duration_seconds + print( + f"音频预处理完成: source={absolute_path.name}, " + f"target={processed_absolute_path.name}, duration={audio_duration}s, " + f"applied={preprocess_result.applied}" + ) except Exception as e: - print(f"警告: 获取音频时长失败,但不影响后续流程: {e}") + if absolute_path.exists(): + try: + os.remove(absolute_path) + except OSError: + pass + return create_api_response(code="500", message=f"音频预处理失败: {str(e)}") - file_path = '/' + str(relative_path) - file_name = audio_file.filename - file_size = audio_file.size + processed_relative_path = processed_absolute_path.relative_to(BASE_DIR) + file_path = '/' + str(processed_relative_path) + file_name = preprocess_result.file_name + file_size = preprocess_result.file_size # 4. 调用 audio_service 处理文件(权限检查、数据库更新、启动转录) result = handle_audio_upload( @@ -761,14 +772,26 @@ async def upload_audio( # 如果不成功,删除已保存的文件并返回错误 if not result["success"]: - if absolute_path.exists(): - try: - os.remove(absolute_path) - print(f"Deleted file due to processing error: {absolute_path}") - except Exception as e: - print(f"Warning: Failed to delete file {absolute_path}: {e}") + cleanup_paths = [processed_absolute_path] + if processed_absolute_path != absolute_path: + cleanup_paths.append(absolute_path) + + for cleanup_path in cleanup_paths: + if cleanup_path.exists(): + try: + os.remove(cleanup_path) + print(f"Deleted file due to processing error: {cleanup_path}") + except Exception as e: + print(f"Warning: Failed to delete file {cleanup_path}: {e}") return result["response"] + if preprocess_result.applied and processed_absolute_path != absolute_path and absolute_path.exists(): + try: + os.remove(absolute_path) + print(f"Deleted original uploaded audio after preprocessing: {absolute_path}") + except Exception as e: + print(f"Warning: Failed to delete original uploaded audio {absolute_path}: {e}") + # 5. 返回成功响应 transcription_task_id = result["transcription_task_id"] message_suffix = "" diff --git a/backend/app/services/system_config_service.py b/backend/app/services/system_config_service.py index a951596..73b1501 100644 --- a/backend/app/services/system_config_service.py +++ b/backend/app/services/system_config_service.py @@ -120,33 +120,8 @@ class SystemConfigService: cfg["audio_scene"] = audio_row["audio_scene"] if audio_row.get("hot_word_group_id") is not None: cfg["hot_word_group_id"] = audio_row["hot_word_group_id"] - - if audio_row.get("audio_scene") == "asr": - if extra_config.get("model") is None and audio_row.get("asr_model_name") is not None: - extra_config["model"] = audio_row["asr_model_name"] - if extra_config.get("vocabulary_id") is None and audio_row.get("asr_vocabulary_id") is not None: - extra_config["vocabulary_id"] = audio_row["asr_vocabulary_id"] - if extra_config.get("speaker_count") is None and audio_row.get("asr_speaker_count") is not None: - extra_config["speaker_count"] = audio_row["asr_speaker_count"] - if extra_config.get("language_hints") is None and audio_row.get("asr_language_hints"): - extra_config["language_hints"] = audio_row["asr_language_hints"] - if extra_config.get("disfluency_removal_enabled") is None and audio_row.get("asr_disfluency_removal_enabled") is not None: - extra_config["disfluency_removal_enabled"] = bool(audio_row["asr_disfluency_removal_enabled"]) - if extra_config.get("diarization_enabled") is None and audio_row.get("asr_diarization_enabled") is not None: - extra_config["diarization_enabled"] = bool(audio_row["asr_diarization_enabled"]) - else: - if extra_config.get("model") is None and audio_row.get("model_name"): - extra_config["model"] = audio_row["model_name"] - if extra_config.get("template_text") is None and audio_row.get("vp_template_text") is not None: - extra_config["template_text"] = audio_row["vp_template_text"] - if extra_config.get("duration_seconds") is None and audio_row.get("vp_duration_seconds") is not None: - extra_config["duration_seconds"] = audio_row["vp_duration_seconds"] - if extra_config.get("sample_rate") is None and audio_row.get("vp_sample_rate") is not None: - extra_config["sample_rate"] = audio_row["vp_sample_rate"] - if extra_config.get("channels") is None and audio_row.get("vp_channels") is not None: - extra_config["channels"] = audio_row["vp_channels"] - if extra_config.get("max_size_bytes") is None and audio_row.get("vp_max_size_bytes") is not None: - extra_config["max_size_bytes"] = audio_row["vp_max_size_bytes"] + if audio_row.get("request_timeout_seconds") is not None: + cfg["request_timeout_seconds"] = int(audio_row["request_timeout_seconds"]) language_hints = cls._normalize_string_list(extra_config.get("language_hints")) if language_hints is not None: @@ -162,11 +137,8 @@ class SystemConfigService: cursor = conn.cursor(dictionary=True) cursor.execute( """ - SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key, hot_word_group_id, - asr_model_name, asr_vocabulary_id, asr_speaker_count, asr_language_hints, - asr_disfluency_removal_enabled, asr_diarization_enabled, - vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes, - extra_config + SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key, + request_timeout_seconds, hot_word_group_id, extra_config FROM audio_model_config WHERE audio_scene = %s AND is_active = 1 ORDER BY is_default DESC, updated_at DESC, config_id ASC @@ -260,11 +232,8 @@ class SystemConfigService: cursor.execute( """ - SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key, hot_word_group_id, - asr_model_name, asr_vocabulary_id, asr_speaker_count, asr_language_hints, - asr_disfluency_removal_enabled, asr_diarization_enabled, - vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes, - extra_config + SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key, + request_timeout_seconds, hot_word_group_id, extra_config FROM audio_model_config WHERE model_code = %s AND is_active = 1 ORDER BY is_default DESC, config_id ASC @@ -427,28 +396,30 @@ class SystemConfigService: cursor.execute( """ INSERT INTO audio_model_config - (model_code, model_name, audio_scene, provider, asr_model_name, asr_vocabulary_id, asr_speaker_count, - asr_language_hints, asr_disfluency_removal_enabled, asr_diarization_enabled, description, is_active, is_default) + (model_code, model_name, audio_scene, provider, request_timeout_seconds, extra_config, description, is_active, is_default) VALUES ( 'audio_model', '音频识别模型', 'asr', 'dashscope', - 'paraformer-v2', - %s, - 10, - 'zh,en', - 1, - 1, + 300, + JSON_OBJECT( + 'model', 'paraformer-v2', + 'vocabulary_id', %s, + 'speaker_count', 10, + 'language_hints', JSON_ARRAY('zh', 'en'), + 'disfluency_removal_enabled', TRUE, + 'diarization_enabled', TRUE + ), '语音识别模型配置', 1, 1 ) ON DUPLICATE KEY UPDATE - asr_vocabulary_id = VALUES(asr_vocabulary_id), + extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s), is_active = 1 """, - (str(value),), + (str(value), str(value)), ) conn.commit() cursor.close() @@ -626,7 +597,6 @@ class SystemConfigService: audio_cfg = cls.get_active_audio_model_config("asr") if audio_cfg.get("vocabulary_id"): return audio_cfg["vocabulary_id"] - # 回退:直接读 audio_model_config.asr_vocabulary_id audio_vocab = cls.get_config_attribute('audio_model', 'vocabulary_id') if audio_vocab: return audio_vocab diff --git a/backend/app/utils/audio_parser.py b/backend/app/utils/audio_parser.py index 724feea..ddb4a35 100644 --- a/backend/app/utils/audio_parser.py +++ b/backend/app/utils/audio_parser.py @@ -3,15 +3,16 @@ 用于解析音频文件的元数据信息,如时长、采样率、编码格式等 """ - -from tinytag import TinyTag +import json +import shutil +import subprocess def get_audio_duration(file_path: str) -> int: """ 获取音频文件时长(秒) - 使用TinyTag读取音频文件时长 + 使用 ffprobe 读取音频时长 Args: file_path: 音频文件的完整路径 @@ -26,13 +27,33 @@ def get_audio_duration(file_path: str) -> int: - WAV (.wav) - OGG (.ogg) - FLAC (.flac) - - 以及TinyTag支持的其他音频格式 + - 以及 ffprobe 支持的其他音频格式 """ + ffprobe_path = shutil.which("ffprobe") + if not ffprobe_path: + return 0 + try: - tag = TinyTag.get(file_path) - if tag.duration and tag.duration > 0: - return int(tag.duration) + completed = subprocess.run( + [ + ffprobe_path, + "-v", + "error", + "-print_format", + "json", + "-show_format", + str(file_path), + ], + check=False, + capture_output=True, + text=True, + ) + if completed.returncode == 0 and completed.stdout: + payload = json.loads(completed.stdout) + duration_value = (payload.get("format") or {}).get("duration") + if duration_value: + return int(float(duration_value)) except Exception as e: - print(f"获取音频时长失败 ({file_path}): {e}") + print(f"ffprobe 获取音频时长失败 ({file_path}): {e}") return 0 diff --git a/backend/requirements.txt b/backend/requirements.txt index 2d8078f..073c5f3 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -22,6 +22,4 @@ psutil # APK Parsing pyaxmlparser -# Audio Metadata -tinytag python-dotenv diff --git a/backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql b/backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql new file mode 100644 index 0000000..efb3fc0 --- /dev/null +++ b/backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql @@ -0,0 +1,105 @@ +SET @request_timeout_exists := ( + SELECT COUNT(*) + FROM information_schema.COLUMNS + WHERE TABLE_SCHEMA = DATABASE() + AND TABLE_NAME = 'audio_model_config' + AND COLUMN_NAME = 'request_timeout_seconds' +); +SET @sql := IF( + @request_timeout_exists = 0, + 'ALTER TABLE `audio_model_config` ADD COLUMN `request_timeout_seconds` int(11) NOT NULL DEFAULT 300 COMMENT ''音频转录请求超时(秒)'' AFTER `api_key`', + 'SELECT 1' +); +PREPARE stmt FROM @sql; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; + +SET @has_asr_legacy := ( + SELECT COUNT(*) + FROM information_schema.COLUMNS + WHERE TABLE_SCHEMA = DATABASE() + AND TABLE_NAME = 'audio_model_config' + AND COLUMN_NAME = 'asr_model_name' +); +SET @sql := IF( + @has_asr_legacy > 0, + 'UPDATE `audio_model_config` + SET `extra_config` = JSON_SET( + COALESCE(`extra_config`, JSON_OBJECT()), + ''$.model'', `asr_model_name`, + ''$.vocabulary_id'', `asr_vocabulary_id`, + ''$.speaker_count'', `asr_speaker_count`, + ''$.language_hints'', `asr_language_hints`, + ''$.disfluency_removal_enabled'', `asr_disfluency_removal_enabled`, + ''$.diarization_enabled'', `asr_diarization_enabled` + ) + WHERE `audio_scene` = ''asr''', + 'SELECT 1' +); +PREPARE stmt FROM @sql; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; + +SET @has_voiceprint_legacy := ( + SELECT COUNT(*) + FROM information_schema.COLUMNS + WHERE TABLE_SCHEMA = DATABASE() + AND TABLE_NAME = 'audio_model_config' + AND COLUMN_NAME = 'vp_template_text' +); +SET @sql := IF( + @has_voiceprint_legacy > 0, + 'UPDATE `audio_model_config` + SET `extra_config` = JSON_SET( + COALESCE(`extra_config`, JSON_OBJECT()), + ''$.template_text'', `vp_template_text`, + ''$.duration_seconds'', `vp_duration_seconds`, + ''$.sample_rate'', `vp_sample_rate`, + ''$.channels'', `vp_channels`, + ''$.max_size_bytes'', `vp_max_size_bytes` + ) + WHERE `audio_scene` = ''voiceprint''', + 'SELECT 1' +); +PREPARE stmt FROM @sql; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; + +UPDATE `audio_model_config` +SET `request_timeout_seconds` = COALESCE( + NULLIF(`request_timeout_seconds`, 0), + CAST(JSON_UNQUOTE(JSON_EXTRACT(`extra_config`, '$.request_timeout_seconds')) AS UNSIGNED), + 300 +); + +SET SESSION group_concat_max_len = 8192; +SELECT GROUP_CONCAT(CONCAT('DROP COLUMN `', COLUMN_NAME, '`') ORDER BY ORDINAL_POSITION SEPARATOR ', ') +INTO @drop_columns_sql +FROM information_schema.COLUMNS +WHERE TABLE_SCHEMA = DATABASE() + AND TABLE_NAME = 'audio_model_config' + AND COLUMN_NAME IN ( + 'asr_model_name', + 'asr_vocabulary_id', + 'asr_speaker_count', + 'asr_language_hints', + 'asr_disfluency_removal_enabled', + 'asr_diarization_enabled', + 'vp_template_text', + 'vp_duration_seconds', + 'vp_sample_rate', + 'vp_channels', + 'vp_max_size_bytes' + ); + +SET @sql := IF( + @drop_columns_sql IS NULL OR @drop_columns_sql = '', + 'SELECT 1', + CONCAT('ALTER TABLE `audio_model_config` ', @drop_columns_sql) +); +PREPARE stmt FROM @sql; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; + +DROP TABLE IF EXISTS `ai_model_configs`; +DROP TABLE IF EXISTS `ai_model_config`; diff --git a/deploy.md b/deploy.md index aa63ea9..e4881b4 100644 --- a/deploy.md +++ b/deploy.md @@ -2,4 +2,9 @@ # 组件 + 数据库 mysql 5.7+ 10.100.51.51:3306 root | Unis@123 -+ 缓存 redis 6.2 10.100.51.51:6379 Unis@123 \ No newline at end of file ++ 缓存 redis 6.2 10.100.51.51:6379 Unis@123 + +# 升级前确认 ++ 后端运行环境需提供 `ffmpeg` 与 `ffprobe` ++ 本次数据库升级包含 `backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql` ++ 升级后 `audio_model_config` 将新增 `request_timeout_seconds`,并清理旧的 ASR/声纹冗余列 diff --git a/frontend/src/hooks/useAdminDashboardPage.js b/frontend/src/hooks/useAdminDashboardPage.js index 81e7525..12f2ea9 100644 --- a/frontend/src/hooks/useAdminDashboardPage.js +++ b/frontend/src/hooks/useAdminDashboardPage.js @@ -172,7 +172,7 @@ export default function useAdminDashboardPage() { const handleDownloadTranscript = async (meetingId) => { try { - const response = await apiClient.get(buildApiUrl(`/api/meetings/${meetingId}/transcript`)); + const response = await apiClient.get(buildApiUrl(API_ENDPOINTS.MEETINGS.TRANSCRIPT(meetingId))); if (response.code === '200') { const dataStr = JSON.stringify(response.data, null, 2); const blob = new Blob([dataStr], { type: 'application/json' }); @@ -190,6 +190,32 @@ export default function useAdminDashboardPage() { } }; + const handleDownloadAudio = async (meetingId, audioFilePath) => { + try { + const response = await fetch(buildApiUrl(`${API_ENDPOINTS.MEETINGS.AUDIO(meetingId)}/stream`), { + credentials: 'include', + }); + if (!response.ok) { + throw new Error(`audio download failed: ${response.status}`); + } + + const blob = await response.blob(); + const url = URL.createObjectURL(blob); + const link = document.createElement('a'); + const fileNameFromPath = audioFilePath?.split('/').pop(); + const fallbackExtension = fileNameFromPath?.includes('.') ? '' : '.mp3'; + link.href = url; + link.download = fileNameFromPath || `meeting_audio_${meetingId}${fallbackExtension}`; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + } catch (error) { + console.error('下载音频失败:', error); + message.error('下载音频失败'); + } + }; + const closeMeetingModal = () => { setShowMeetingModal(false); setMeetingDetails(null); @@ -225,6 +251,7 @@ export default function useAdminDashboardPage() { handleKickUser, handleViewMeeting, handleDownloadTranscript, + handleDownloadAudio, closeMeetingModal, taskCompletionRate, }; diff --git a/frontend/src/pages/AdminDashboard.jsx b/frontend/src/pages/AdminDashboard.jsx index 8483921..daae36f 100644 --- a/frontend/src/pages/AdminDashboard.jsx +++ b/frontend/src/pages/AdminDashboard.jsx @@ -55,6 +55,16 @@ const STATUS_MAP = { }; const formatResourcePercent = (value) => `${Number(value || 0).toFixed(1)}%`; +const formatAudioDuration = (duration) => { + if (!duration && duration !== 0) { + return '无时长信息'; + } + + const totalSeconds = Math.max(0, Math.floor(Number(duration) || 0)); + const minutes = Math.floor(totalSeconds / 60); + const seconds = totalSeconds % 60; + return `${minutes}分${seconds}秒`; +}; const AdminDashboard = () => { const { @@ -80,6 +90,7 @@ const AdminDashboard = () => { handleKickUser, handleViewMeeting, handleDownloadTranscript, + handleDownloadAudio, closeMeetingModal, taskCompletionRate, } = useAdminDashboardPage(); @@ -167,6 +178,13 @@ const AdminDashboard = () => { ), }, + { + title: '关联账号', + dataIndex: 'creator_name', + key: 'creator_name', + width: 140, + render: (text) => text || '-', + }, { title: '状态', dataIndex: 'status', @@ -384,14 +402,25 @@ const AdminDashboard = () => { ) : meetingDetails ? ( {meetingDetails.title} + {meetingDetails.creator_account || '-'} {meetingDetails.meeting_time ? new Date(meetingDetails.meeting_time).toLocaleString() : '-'} {meetingDetails.prompt_name || '默认模版'} - {meetingDetails.audio_duration - ? `${Math.floor(meetingDetails.audio_duration / 60)}分${Math.floor(meetingDetails.audio_duration % 60)}秒` - : '无时长信息'} + + {formatAudioDuration(meetingDetails.audio_duration)} + {meetingDetails.audio_file_path ? ( + + ) : null} + } onClick={() => handleDownloadTranscript(meetingDetails.meeting_id)}> diff --git a/frontend/src/pages/admin/ModelManagement.jsx b/frontend/src/pages/admin/ModelManagement.jsx index 53022a8..487ca24 100644 --- a/frontend/src/pages/admin/ModelManagement.jsx +++ b/frontend/src/pages/admin/ModelManagement.jsx @@ -165,6 +165,7 @@ const ModelManagement = () => { provider: values.provider, endpoint_url: values.endpoint_url, api_key: values.api_key, + request_timeout_seconds: values.request_timeout_seconds, hot_word_group_id: values.hot_word_group_id || null, extra_config: extraConfig, description: values.description, @@ -233,6 +234,7 @@ const ModelManagement = () => { endpoint_url: 'https://dashscope.aliyuncs.com/api/v1/services/audio/asr/transcription', api_key: '', service_model_name: 'paraformer-v2', + request_timeout_seconds: 300, hot_word_group_id: undefined, asr_speaker_count: 10, asr_language_hints: 'zh,en', @@ -267,24 +269,25 @@ const ModelManagement = () => { const extraConfig = normalizeAudioExtraConfig(row); form.setFieldsValue({ ...row, + request_timeout_seconds: row.request_timeout_seconds ?? 300, hot_word_group_id: row.hot_word_group_id || undefined, - service_model_name: row.service_model_name || extraConfig.model || row.asr_model_name || row.model_name, - asr_speaker_count: extraConfig.speaker_count ?? row.asr_speaker_count, + service_model_name: row.service_model_name || extraConfig.model || row.model_name, + asr_speaker_count: extraConfig.speaker_count, asr_language_hints: Array.isArray(extraConfig.language_hints) ? extraConfig.language_hints.join(',') - : extraConfig.language_hints || row.asr_language_hints, - asr_disfluency_removal_enabled: extraConfig.disfluency_removal_enabled ?? Boolean(row.asr_disfluency_removal_enabled), - asr_diarization_enabled: extraConfig.diarization_enabled ?? Boolean(row.asr_diarization_enabled), + : extraConfig.language_hints, + asr_disfluency_removal_enabled: extraConfig.disfluency_removal_enabled ?? false, + asr_diarization_enabled: extraConfig.diarization_enabled ?? false, asr_timestamp_alignment_enabled: extraConfig.timestamp_alignment_enabled ?? false, asr_channel_id: Array.isArray(extraConfig.channel_id) ? extraConfig.channel_id.join(',') : extraConfig.channel_id, asr_special_word_filter: extraConfig.special_word_filter, asr_audio_event_detection_enabled: extraConfig.audio_event_detection_enabled ?? false, asr_phrase_id: extraConfig.phrase_id, - vp_template_text: extraConfig.template_text ?? row.vp_template_text, - vp_duration_seconds: extraConfig.duration_seconds ?? row.vp_duration_seconds, - vp_sample_rate: extraConfig.sample_rate ?? row.vp_sample_rate, - vp_channels: extraConfig.channels ?? row.vp_channels, - vp_max_size_bytes: extraConfig.max_size_bytes ?? row.vp_max_size_bytes, + vp_template_text: extraConfig.template_text, + vp_duration_seconds: extraConfig.duration_seconds, + vp_sample_rate: extraConfig.sample_rate, + vp_channels: extraConfig.channels, + vp_max_size_bytes: extraConfig.max_size_bytes, }); } setDrawerOpen(true); @@ -419,10 +422,10 @@ const ModelManagement = () => { key: 'core', render: (_, row) => { const extraConfig = normalizeAudioExtraConfig(row); - const serviceModelName = row.service_model_name || extraConfig.model || row.asr_model_name || row.model_name; + const serviceModelName = row.service_model_name || extraConfig.model || row.model_name; return row.audio_scene === 'voiceprint' - ? `模型=${serviceModelName || '-'} 时长=${extraConfig.duration_seconds || row.vp_duration_seconds || '-'}s 采样=${extraConfig.sample_rate || row.vp_sample_rate || '-'}` - : `模型=${serviceModelName || '-'} 热词组=${row.hot_word_group_name || '未关联'}`; + ? `模型=${serviceModelName || '-'} 时长=${extraConfig.duration_seconds || '-'}s 采样=${extraConfig.sample_rate || '-'}` + : `模型=${serviceModelName || '-'} 超时=${row.request_timeout_seconds || 300}s 热词组=${row.hot_word_group_name || '未关联'}`; }, }, { title: '状态', dataIndex: 'is_active', key: 'is_active', width: 90, render: (v) => }, @@ -628,6 +631,9 @@ const ModelManagement = () => { ) : ( <> + + +