v1.1.1

2026-04-09 17:51:34 +08:00 · 2026-04-09 17:51:34 +08:00 · 3fe28934cc
parent 41f71e649d
commit 3fe28934cc
18 changed files with 590 additions and 260 deletions
--- a/DOCKER_README.md
+++ b/DOCKER_README.md
@ -52,6 +52,9 @@ vim .env  # 配置七牛云、LLM密钥等
 - ✅ 启动所有服务
 - ✅ 等待健康检查

+说明：
+- 后端镜像现在依赖系统级 `ffmpeg/ffprobe` 做音频预处理，已在 `backend/Dockerfile` 中安装，无需宿主机额外安装。
+
 ### 方式二：手动启动

 ```bash
@ -119,6 +122,11 @@ HTTPS_PORT=443
 # HTTP_PORT=80
 ```

+### 音频预处理依赖
+
+- Docker 部署：后端容器内已安装 `ffmpeg`
+- 非 Docker 部署：请确保服务器可执行 `ffmpeg` 和 `ffprobe`
+
 ## 📦 数据目录

 所有数据存储在 `./data/` 目录：
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -19,6 +19,7 @@ COPY requirements.txt .
 RUN apt-get update && apt-get install -y \
    gcc \
    curl \
+    ffmpeg \
    default-libmysqlclient-dev \
    pkg-config \
    && pip install --index-url https://mirrors.aliyun.com/pypi/simple --no-cache-dir -r requirements.txt \
--- a/backend/app/api/endpoints/admin_settings.py
+++ b/backend/app/api/endpoints/admin_settings.py
@ -44,19 +44,9 @@ class AudioModelUpsertRequest(BaseModel):
    provider: str | None = None
    endpoint_url: str | None = None
    api_key: str | None = None
+    request_timeout_seconds: int = 300
    extra_config: dict[str, Any] | None = None
-    asr_model_name: str | None = None
-    asr_vocabulary_id: str | None = None
    hot_word_group_id: int | None = None
-    asr_speaker_count: int | None = None
-    asr_language_hints: str | None = None
-    asr_disfluency_removal_enabled: bool | None = None
-    asr_diarization_enabled: bool | None = None
-    vp_template_text: str | None = None
-    vp_duration_seconds: int | None = None
-    vp_sample_rate: int | None = None
-    vp_channels: int | None = None
-    vp_max_size_bytes: int | None = None
    description: str | None = None
    is_active: bool = True
    is_default: bool = False
--- a/backend/app/api/endpoints/audio.py
+++ b/backend/app/api/endpoints/audio.py
@ -5,8 +5,8 @@ from app.core.auth import get_current_user
 from app.core.response import create_api_response
 from app.services.async_transcription_service import AsyncTranscriptionService
 from app.services.async_meeting_service import async_meeting_service
+from app.services.audio_preprocess_service import audio_preprocess_service
 from app.services.audio_service import handle_audio_upload
-from app.utils.audio_parser import get_audio_duration
 from pydantic import BaseModel
 from typing import Optional, List
 from datetime import datetime, timedelta
@ -456,18 +456,30 @@ async def complete_upload(
                }
            )

-        # 6. 获取文件信息
+        # 6. 对合并后的音频执行统一预处理
        full_path = BASE_DIR / file_path.lstrip('/')
-        file_size = full_path.stat().st_size
-        file_name = full_path.name
-
-        # 6.5 获取音频时长
-        audio_duration = 0
        try:
-            audio_duration = get_audio_duration(str(full_path))
-            print(f"音频时长: {audio_duration}秒")
+            preprocess_result = audio_preprocess_service.preprocess(full_path)
+            processed_full_path = preprocess_result.file_path
+            file_size = preprocess_result.file_size
+            file_name = preprocess_result.file_name
+            audio_duration = preprocess_result.metadata.duration_seconds
+            file_path = f"/{processed_full_path.relative_to(BASE_DIR)}"
+            print(
+                f"流式上传音频预处理完成: source={full_path.name}, "
+                f"target={processed_full_path.name}, duration={audio_duration}s, "
+                f"applied={preprocess_result.applied}"
+            )
        except Exception as e:
-            print(f"警告: 获取音频时长失败，但不影响后续流程: {e}")
+            if full_path.exists():
+                try:
+                    os.remove(full_path)
+                except OSError:
+                    pass
+            return create_api_response(
+                code="500",
+                message=f"音频预处理失败: {str(e)}"
+            )

        # 7. 调用 audio_service 处理文件（数据库更新、启动转录和总结）
        result = handle_audio_upload(
@ -484,8 +496,24 @@ async def complete_upload(

        # 如果处理失败，返回错误
        if not result["success"]:
+            cleanup_paths = [processed_full_path]
+            if processed_full_path != full_path:
+                cleanup_paths.append(full_path)
+
+            for cleanup_path in cleanup_paths:
+                if cleanup_path.exists():
+                    try:
+                        os.remove(cleanup_path)
+                    except OSError:
+                        pass
            return result["response"]

+        if preprocess_result.applied and processed_full_path != full_path and full_path.exists():
+            try:
+                os.remove(full_path)
+            except OSError:
+                pass
+
        # 8. 返回成功响应
        transcription_task_id = result["transcription_task_id"]
        message_suffix = ""
--- a/backend/app/api/endpoints/hot_words.py
+++ b/backend/app/api/endpoints/hot_words.py
@ -117,7 +117,6 @@ async def delete_group(id: int, current_user: dict = Depends(get_current_admin_u
                """
                UPDATE audio_model_config
                SET hot_word_group_id = NULL,
-                    asr_vocabulary_id = NULL,
                    extra_config = JSON_REMOVE(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id')
                WHERE hot_word_group_id = %s
                """,
@ -190,15 +189,14 @@ async def sync_group(id: int, current_user: dict = Depends(get_current_admin_use
                (vocab_id, id),
            )

-            # 更新关联该组的所有 audio_model_config.asr_vocabulary_id
+            # 更新关联该组的所有 audio_model_config.extra_config.vocabulary_id
            cursor.execute(
                """
                UPDATE audio_model_config
-                SET asr_vocabulary_id = %s,
-                    extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s)
+                SET extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s)
                WHERE hot_word_group_id = %s
                """,
-                (vocab_id, vocab_id, id),
+                (vocab_id, id),
            )

            conn.commit()
--- a/backend/app/models/models.py
+++ b/backend/app/models/models.py
@ -76,6 +76,7 @@ class Meeting(BaseModel):
    description: Optional[str] = None
    creator_id: int
    creator_username: str
+    creator_account: Optional[str] = None
    created_at: datetime.datetime
    attendees: List[AttendeeInfo]
    attendee_ids: Optional[List[int]] = None
--- a/backend/app/services/admin_settings_service.py
+++ b/backend/app/services/admin_settings_service.py
@ -92,24 +92,12 @@ def _merge_audio_extra_config(request, vocabulary_id: str | None = None) -> dict
    extra_config = _parse_json_object(request.extra_config)

    if request.audio_scene == "asr":
-        legacy_config = {
-            "model": request.asr_model_name,
-            "speaker_count": request.asr_speaker_count,
-            "language_hints": request.asr_language_hints,
-            "disfluency_removal_enabled": request.asr_disfluency_removal_enabled,
-            "diarization_enabled": request.asr_diarization_enabled,
-        }
+        if vocabulary_id:
+            extra_config["vocabulary_id"] = vocabulary_id
    else:
-        legacy_config = {
-            "model": request.model_name,
-            "template_text": request.vp_template_text,
-            "duration_seconds": request.vp_duration_seconds,
-            "sample_rate": request.vp_sample_rate,
-            "channels": request.vp_channels,
-            "max_size_bytes": request.vp_max_size_bytes,
-        }
+        extra_config.pop("vocabulary_id", None)

-    merged = {**legacy_config, **extra_config}
+    merged = dict(extra_config)

    language_hints = _normalize_string_list(merged.get("language_hints"))
    if language_hints is not None:
@ -119,94 +107,20 @@ def _merge_audio_extra_config(request, vocabulary_id: str | None = None) -> dict
    if channel_id is not None:
        merged["channel_id"] = channel_id

-    resolved_vocabulary_id = vocabulary_id or merged.get("vocabulary_id") or request.asr_vocabulary_id
-    if request.audio_scene == "asr" and resolved_vocabulary_id:
-        merged["vocabulary_id"] = resolved_vocabulary_id
-
    return _clean_extra_config(merged)


-def _extract_legacy_audio_columns(audio_scene: str, extra_config: dict[str, Any]) -> dict[str, Any]:
-    extra_config = _parse_json_object(extra_config)
-    columns = {
-        "asr_model_name": None,
-        "asr_vocabulary_id": None,
-        "asr_speaker_count": None,
-        "asr_language_hints": None,
-        "asr_disfluency_removal_enabled": None,
-        "asr_diarization_enabled": None,
-        "vp_template_text": None,
-        "vp_duration_seconds": None,
-        "vp_sample_rate": None,
-        "vp_channels": None,
-        "vp_max_size_bytes": None,
-    }
-
-    if audio_scene == "asr":
-        language_hints = extra_config.get("language_hints")
-        if isinstance(language_hints, list):
-            language_hints = ",".join(str(item).strip() for item in language_hints if str(item).strip())
-        columns.update(
-            {
-                "asr_model_name": extra_config.get("model"),
-                "asr_vocabulary_id": extra_config.get("vocabulary_id"),
-                "asr_speaker_count": extra_config.get("speaker_count"),
-                "asr_language_hints": language_hints,
-                "asr_disfluency_removal_enabled": 1 if extra_config.get("disfluency_removal_enabled") is True else 0 if extra_config.get("disfluency_removal_enabled") is False else None,
-                "asr_diarization_enabled": 1 if extra_config.get("diarization_enabled") is True else 0 if extra_config.get("diarization_enabled") is False else None,
-            }
-        )
-    else:
-        columns.update(
-            {
-                "vp_template_text": extra_config.get("template_text"),
-                "vp_duration_seconds": extra_config.get("duration_seconds"),
-                "vp_sample_rate": extra_config.get("sample_rate"),
-                "vp_channels": extra_config.get("channels"),
-                "vp_max_size_bytes": extra_config.get("max_size_bytes"),
-            }
-        )
-
-    return columns
-
-
 def _normalize_audio_row(row: dict[str, Any]) -> dict[str, Any]:
    extra_config = _parse_json_object(row.get("extra_config"))

-    if row.get("audio_scene") == "asr":
-        if extra_config.get("model") is None and row.get("asr_model_name") is not None:
-            extra_config["model"] = row["asr_model_name"]
-        if extra_config.get("vocabulary_id") is None and row.get("asr_vocabulary_id") is not None:
-            extra_config["vocabulary_id"] = row["asr_vocabulary_id"]
-        if extra_config.get("speaker_count") is None and row.get("asr_speaker_count") is not None:
-            extra_config["speaker_count"] = row["asr_speaker_count"]
-        if extra_config.get("language_hints") is None and row.get("asr_language_hints"):
-            extra_config["language_hints"] = _normalize_string_list(row["asr_language_hints"])
-        if extra_config.get("disfluency_removal_enabled") is None and row.get("asr_disfluency_removal_enabled") is not None:
-            extra_config["disfluency_removal_enabled"] = bool(row["asr_disfluency_removal_enabled"])
-        if extra_config.get("diarization_enabled") is None and row.get("asr_diarization_enabled") is not None:
-            extra_config["diarization_enabled"] = bool(row["asr_diarization_enabled"])
-    else:
-        if extra_config.get("model") is None and row.get("model_name"):
-            extra_config["model"] = row["model_name"]
-        if extra_config.get("template_text") is None and row.get("vp_template_text") is not None:
-            extra_config["template_text"] = row["vp_template_text"]
-        if extra_config.get("duration_seconds") is None and row.get("vp_duration_seconds") is not None:
-            extra_config["duration_seconds"] = row["vp_duration_seconds"]
-        if extra_config.get("sample_rate") is None and row.get("vp_sample_rate") is not None:
-            extra_config["sample_rate"] = row["vp_sample_rate"]
-        if extra_config.get("channels") is None and row.get("vp_channels") is not None:
-            extra_config["channels"] = row["vp_channels"]
-        if extra_config.get("max_size_bytes") is None and row.get("vp_max_size_bytes") is not None:
-            extra_config["max_size_bytes"] = row["vp_max_size_bytes"]
-
    row["extra_config"] = extra_config
    row["service_model_name"] = extra_config.get("model")
+    row["request_timeout_seconds"] = int(row.get("request_timeout_seconds") or 300)
    return row


 def _resolve_hot_word_vocabulary_id(cursor, request) -> str | None:
-    vocabulary_id = request.asr_vocabulary_id
+    vocabulary_id = _parse_json_object(request.extra_config).get("vocabulary_id")
    if request.hot_word_group_id:
        cursor.execute("SELECT vocabulary_id FROM hot_word_group WHERE id = %s", (request.hot_word_group_id,))
        group_row = cursor.fetchone()
@ -482,10 +396,8 @@ def list_audio_model_configs(scene: str = "all"):
            cursor = conn.cursor(dictionary=True)
            sql = """
                SELECT a.config_id, a.model_code, a.model_name, a.audio_scene, a.provider, a.endpoint_url, a.api_key,
-                       a.asr_model_name, a.asr_vocabulary_id, a.hot_word_group_id, a.asr_speaker_count, a.asr_language_hints,
-                       a.asr_disfluency_removal_enabled, a.asr_diarization_enabled,
-                       a.vp_template_text, a.vp_duration_seconds, a.vp_sample_rate, a.vp_channels, a.vp_max_size_bytes,
-                       a.extra_config, a.description, a.is_active, a.is_default, a.created_at, a.updated_at,
+                       a.request_timeout_seconds, a.hot_word_group_id, a.extra_config,
+                       a.description, a.is_active, a.is_default, a.created_at, a.updated_at,
                       g.name AS hot_word_group_name, g.vocabulary_id AS hot_word_group_vocab_id
                FROM audio_model_config a
                LEFT JOIN hot_word_group g ON g.id = a.hot_word_group_id
@ -524,17 +436,13 @@ def create_audio_model_config(request):

            asr_vocabulary_id = _resolve_hot_word_vocabulary_id(cursor, request)
            extra_config = _merge_audio_extra_config(request, vocabulary_id=asr_vocabulary_id)
-            legacy_columns = _extract_legacy_audio_columns(request.audio_scene, extra_config)

            cursor.execute(
                """
                INSERT INTO audio_model_config
                (model_code, model_name, audio_scene, provider, endpoint_url, api_key,
-                 asr_model_name, asr_vocabulary_id, hot_word_group_id, asr_speaker_count, asr_language_hints,
-                 asr_disfluency_removal_enabled, asr_diarization_enabled,
-                 vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes,
-                 extra_config, description, is_active, is_default)
-                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+                 request_timeout_seconds, hot_word_group_id, extra_config, description, is_active, is_default)
+                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                """,
                (
                    request.model_code,
@ -543,18 +451,8 @@ def create_audio_model_config(request):
                    request.provider,
                    request.endpoint_url,
                    request.api_key,
-                    legacy_columns["asr_model_name"],
-                    legacy_columns["asr_vocabulary_id"],
+                    request.request_timeout_seconds,
                    request.hot_word_group_id,
-                    legacy_columns["asr_speaker_count"],
-                    legacy_columns["asr_language_hints"],
-                    legacy_columns["asr_disfluency_removal_enabled"],
-                    legacy_columns["asr_diarization_enabled"],
-                    legacy_columns["vp_template_text"],
-                    legacy_columns["vp_duration_seconds"],
-                    legacy_columns["vp_sample_rate"],
-                    legacy_columns["vp_channels"],
-                    legacy_columns["vp_max_size_bytes"],
                    json.dumps(extra_config, ensure_ascii=False),
                    request.description,
                    1 if request.is_active else 0,
@ -594,16 +492,13 @@ def update_audio_model_config(model_code: str, request):

            asr_vocabulary_id = _resolve_hot_word_vocabulary_id(cursor, request)
            extra_config = _merge_audio_extra_config(request, vocabulary_id=asr_vocabulary_id)
-            legacy_columns = _extract_legacy_audio_columns(request.audio_scene, extra_config)

            cursor.execute(
                """
                UPDATE audio_model_config
                SET model_code = %s, model_name = %s, audio_scene = %s, provider = %s, endpoint_url = %s, api_key = %s,
-                    asr_model_name = %s, asr_vocabulary_id = %s, hot_word_group_id = %s, asr_speaker_count = %s, asr_language_hints = %s,
-                    asr_disfluency_removal_enabled = %s, asr_diarization_enabled = %s,
-                    vp_template_text = %s, vp_duration_seconds = %s, vp_sample_rate = %s, vp_channels = %s, vp_max_size_bytes = %s,
-                    extra_config = %s, description = %s, is_active = %s, is_default = %s
+                    request_timeout_seconds = %s, hot_word_group_id = %s, extra_config = %s,
+                    description = %s, is_active = %s, is_default = %s
                WHERE model_code = %s
                """,
                (
@ -613,18 +508,8 @@ def update_audio_model_config(model_code: str, request):
                    request.provider,
                    request.endpoint_url,
                    request.api_key,
-                    legacy_columns["asr_model_name"],
-                    legacy_columns["asr_vocabulary_id"],
+                    request.request_timeout_seconds,
                    request.hot_word_group_id,
-                    legacy_columns["asr_speaker_count"],
-                    legacy_columns["asr_language_hints"],
-                    legacy_columns["asr_disfluency_removal_enabled"],
-                    legacy_columns["asr_diarization_enabled"],
-                    legacy_columns["vp_template_text"],
-                    legacy_columns["vp_duration_seconds"],
-                    legacy_columns["vp_sample_rate"],
-                    legacy_columns["vp_channels"],
-                    legacy_columns["vp_max_size_bytes"],
                    json.dumps(extra_config, ensure_ascii=False),
                    request.description,
                    1 if request.is_active else 0,
@ -693,6 +578,7 @@ def test_audio_model_config(request):
            "api_key": request.api_key,
            "audio_scene": request.audio_scene,
            "hot_word_group_id": request.hot_word_group_id,
+            "request_timeout_seconds": request.request_timeout_seconds,
            **extra_config,
        }
        result = transcription_service.test_asr_model(runtime_config, test_file_url=request.test_file_url)
--- a/backend/app/services/async_transcription_service.py
+++ b/backend/app/services/async_transcription_service.py
@ -14,6 +14,19 @@ from app.core.database import get_db_connection
 from app.services.system_config_service import SystemConfigService


+class _DefaultTimeoutSession(requests.Session):
+    """为 requests.Session 注入默认超时。"""
+
+    def __init__(self, default_timeout: Optional[int] = None):
+        super().__init__()
+        self.default_timeout = default_timeout
+
+    def request(self, method, url, **kwargs):
+        if "timeout" not in kwargs and self.default_timeout:
+            kwargs["timeout"] = self.default_timeout
+        return super().request(method, url, **kwargs)
+
+
 class AsyncTranscriptionService:
    """异步转录服务类"""
    
@ -23,8 +36,8 @@ class AsyncTranscriptionService:
        self.base_url = APP_CONFIG['base_url']

    @staticmethod
-    def _create_requests_session() -> requests.Session:
-        session = requests.Session()
+    def _create_requests_session(default_timeout: Optional[int] = None) -> requests.Session:
+        session = _DefaultTimeoutSession(default_timeout=default_timeout)
        session.trust_env = os.getenv("IMEETING_USE_SYSTEM_PROXY", "").lower() in {"1", "true", "yes", "on"}
        return session

@ -57,6 +70,35 @@ class AsyncTranscriptionService:
            request_options["base_address"] = base_address
        return request_options

+    @staticmethod
+    def _resolve_request_timeout_seconds(audio_config: Optional[Dict[str, Any]] = None) -> int:
+        value = (audio_config or {}).get("request_timeout_seconds")
+        try:
+            timeout_seconds = int(value)
+        except (TypeError, ValueError):
+            timeout_seconds = 300
+        return max(10, timeout_seconds)
+
+    def _dashscope_async_call(self, request_options: Dict[str, Any], call_params: Dict[str, Any], timeout_seconds: int):
+        session = self._create_requests_session(timeout_seconds)
+        try:
+            try:
+                return Transcription.async_call(session=session, **request_options, **call_params)
+            except TypeError:
+                return Transcription.async_call(**request_options, **call_params)
+        finally:
+            session.close()
+
+    def _dashscope_fetch(self, paraformer_task_id: str, request_options: Dict[str, Any], timeout_seconds: int):
+        session = self._create_requests_session(timeout_seconds)
+        try:
+            try:
+                return Transcription.fetch(task=paraformer_task_id, session=session, **request_options)
+            except TypeError:
+                return Transcription.fetch(task=paraformer_task_id, **request_options)
+        finally:
+            session.close()
+
    @staticmethod
    def _build_dashscope_call_params(audio_config: Dict[str, Any], file_url: str) -> Dict[str, Any]:
        model_name = audio_config.get("model") or "paraformer-v2"
@ -93,13 +135,14 @@ class AsyncTranscriptionService:
            raise Exception(f"当前仅支持 DashScope 音频识别测试，暂不支持供应商: {provider}")

        request_options = self._build_dashscope_request_options(audio_config)
+        timeout_seconds = self._resolve_request_timeout_seconds(audio_config)
        dashscope.api_key = request_options["api_key"]
        target_file_url = (
            test_file_url
            or "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav"
        )
        call_params = self._build_dashscope_call_params(audio_config, target_file_url)
-        response = Transcription.async_call(**request_options, **call_params)
+        response = self._dashscope_async_call(request_options, call_params, timeout_seconds)

        if response.status_code != HTTPStatus.OK:
            raise Exception(response.message or "音频模型测试失败")
@ -154,6 +197,7 @@ class AsyncTranscriptionService:
                raise Exception(f"当前仅支持 DashScope 音频识别，暂不支持供应商: {provider}")

            request_options = self._build_dashscope_request_options(audio_config)
+            timeout_seconds = self._resolve_request_timeout_seconds(audio_config)
            dashscope.api_key = request_options["api_key"]
            call_params = self._build_dashscope_call_params(audio_config, file_url)

@ -164,7 +208,7 @@ class AsyncTranscriptionService:
            )

            # 3. 调用Paraformer异步API
-            task_response = Transcription.async_call(**request_options, **call_params)
+            task_response = self._dashscope_async_call(request_options, call_params, timeout_seconds)

            if task_response.status_code != HTTPStatus.OK:
                print(f"Failed to start transcription: {task_response.status_code}, {task_response.message}")
@ -238,11 +282,11 @@ class AsyncTranscriptionService:

                    # 2. 查询外部API获取状态
                    try:
-                        request_options = self._build_dashscope_request_options(
-                            SystemConfigService.get_active_audio_model_config("asr")
-                        )
+                        audio_config = SystemConfigService.get_active_audio_model_config("asr")
+                        request_options = self._build_dashscope_request_options(audio_config)
+                        timeout_seconds = self._resolve_request_timeout_seconds(audio_config)
                        dashscope.api_key = request_options["api_key"]
-                        paraformer_response = Transcription.fetch(task=paraformer_task_id, **request_options)
+                        paraformer_response = self._dashscope_fetch(paraformer_task_id, request_options, timeout_seconds)
                        if paraformer_response.status_code != HTTPStatus.OK:
                            raise Exception(f"Failed to fetch task status from provider: {paraformer_response.message}")
                        
@ -560,9 +604,11 @@ class AsyncTranscriptionService:
            transcription_url = paraformer_output['results'][0]['transcription_url']
            print(f"Fetching transcription from URL: {transcription_url}")

-            session = self._create_requests_session()
+            audio_config = SystemConfigService.get_active_audio_model_config("asr")
+            timeout_seconds = self._resolve_request_timeout_seconds(audio_config)
+            session = self._create_requests_session(timeout_seconds)
            try:
-                response = session.get(transcription_url)
+                response = session.get(transcription_url, timeout=timeout_seconds)
            finally:
                session.close()
            response.raise_for_status()
--- a/backend/app/services/audio_preprocess_service.py
+++ b/backend/app/services/audio_preprocess_service.py
@ -0,0 +1,188 @@
+"""
+音频预处理服务
+
+使用 ffprobe/ffmpeg 对上传音频做统一探测和规范化，降低长会议音频的格式兼容风险。
+当前阶段只做单文件预处理，不做拆片。
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+import json
+import shutil
+import subprocess
+
+from app.utils.audio_parser import get_audio_duration
+
+
+@dataclass
+class AudioMetadata:
+    """音频元数据"""
+
+    duration_seconds: int = 0
+    sample_rate: Optional[int] = None
+    channels: Optional[int] = None
+    codec_name: Optional[str] = None
+    format_name: Optional[str] = None
+    bit_rate: Optional[int] = None
+
+
+@dataclass
+class AudioPreprocessResult:
+    """音频预处理结果"""
+
+    file_path: Path
+    file_name: str
+    file_size: int
+    metadata: AudioMetadata
+    applied: bool = False
+    output_format: Optional[str] = None
+
+
+class AudioPreprocessService:
+    """基于 ffmpeg 的音频预处理服务"""
+
+    TARGET_EXTENSION = ".m4a"
+    TARGET_SAMPLE_RATE = 16000
+    TARGET_CHANNELS = 1
+    TARGET_BITRATE = "64k"
+
+    def __init__(self):
+        self.ffmpeg_path = shutil.which("ffmpeg")
+        self.ffprobe_path = shutil.which("ffprobe")
+
+    def probe_audio(self, file_path: str | Path) -> AudioMetadata:
+        """
+        使用 ffprobe 探测音频元数据。
+        """
+        path = Path(file_path)
+        if not path.exists():
+            raise FileNotFoundError(f"音频文件不存在: {path}")
+
+        if self.ffprobe_path:
+            metadata = self._probe_with_ffprobe(path)
+            if metadata:
+                return metadata
+
+        return AudioMetadata(duration_seconds=get_audio_duration(str(path)))
+
+    def preprocess(self, file_path: str | Path) -> AudioPreprocessResult:
+        """
+        预处理音频为统一格式。
+
+        当前策略：
+        1. 去除视频流，仅保留音频
+        2. 统一单声道
+        3. 统一采样率 16k
+        4. 转为 m4a(aac)
+        """
+        source_path = Path(file_path)
+        if not source_path.exists():
+            raise FileNotFoundError(f"音频文件不存在: {source_path}")
+
+        if not self.ffmpeg_path:
+            metadata = self.probe_audio(source_path)
+            return AudioPreprocessResult(
+                file_path=source_path,
+                file_name=source_path.name,
+                file_size=source_path.stat().st_size,
+                metadata=metadata,
+                applied=False,
+                output_format=source_path.suffix.lower().lstrip(".") or None,
+            )
+
+        output_path = source_path.with_name(f"{source_path.stem}_normalized{self.TARGET_EXTENSION}")
+        temp_output_path = output_path.with_name(f"{output_path.stem}.tmp{output_path.suffix}")
+
+        command = [
+            self.ffmpeg_path,
+            "-y",
+            "-i",
+            str(source_path),
+            "-vn",
+            "-ac",
+            str(self.TARGET_CHANNELS),
+            "-ar",
+            str(self.TARGET_SAMPLE_RATE),
+            "-c:a",
+            "aac",
+            "-b:a",
+            self.TARGET_BITRATE,
+            "-movflags",
+            "+faststart",
+            str(temp_output_path),
+        ]
+
+        try:
+            completed = subprocess.run(
+                command,
+                check=False,
+                capture_output=True,
+                text=True,
+            )
+            if completed.returncode != 0:
+                stderr = (completed.stderr or "").strip()
+                raise RuntimeError(stderr or "ffmpeg 预处理失败")
+
+            temp_output_path.replace(output_path)
+            metadata = self.probe_audio(output_path)
+            return AudioPreprocessResult(
+                file_path=output_path,
+                file_name=output_path.name,
+                file_size=output_path.stat().st_size,
+                metadata=metadata,
+                applied=True,
+                output_format=output_path.suffix.lower().lstrip("."),
+            )
+        finally:
+            if temp_output_path.exists():
+                temp_output_path.unlink()
+
+    def _probe_with_ffprobe(self, file_path: Path) -> Optional[AudioMetadata]:
+        command = [
+            self.ffprobe_path,
+            "-v",
+            "error",
+            "-print_format",
+            "json",
+            "-show_streams",
+            "-show_format",
+            str(file_path),
+        ]
+
+        try:
+            completed = subprocess.run(
+                command,
+                check=False,
+                capture_output=True,
+                text=True,
+            )
+            if completed.returncode != 0 or not completed.stdout:
+                return None
+
+            payload = json.loads(completed.stdout)
+            streams = payload.get("streams") or []
+            audio_stream = next((stream for stream in streams if stream.get("codec_type") == "audio"), {})
+            format_info = payload.get("format") or {}
+
+            duration_value = audio_stream.get("duration") or format_info.get("duration")
+            duration_seconds = int(float(duration_value)) if duration_value else 0
+
+            sample_rate_value = audio_stream.get("sample_rate")
+            channels_value = audio_stream.get("channels")
+            bit_rate_value = audio_stream.get("bit_rate") or format_info.get("bit_rate")
+
+            return AudioMetadata(
+                duration_seconds=duration_seconds,
+                sample_rate=int(sample_rate_value) if sample_rate_value else None,
+                channels=int(channels_value) if channels_value else None,
+                codec_name=audio_stream.get("codec_name"),
+                format_name=format_info.get("format_name"),
+                bit_rate=int(bit_rate_value) if bit_rate_value else None,
+            )
+        except Exception:
+            return None
+
+
+audio_preprocess_service = AudioPreprocessService()
--- a/backend/app/services/meeting_service.py
+++ b/backend/app/services/meeting_service.py
@ -8,8 +8,8 @@ from app.services.llm_service import LLMService
 from app.services.async_transcription_service import AsyncTranscriptionService
 from app.services.async_meeting_service import async_meeting_service
 from app.services.audio_service import handle_audio_upload
+from app.services.audio_preprocess_service import audio_preprocess_service
 from app.services.system_config_service import SystemConfigService
-from app.utils.audio_parser import get_audio_duration
 from app.core.auth import get_current_user, get_optional_current_user
 from app.core.response import create_api_response
 from typing import Any, Dict, List, Optional
@ -479,7 +479,7 @@ def get_meeting_details(meeting_id: int, current_user: dict = Depends(get_curren
        cursor = connection.cursor(dictionary=True)
        query = '''
            SELECT m.meeting_id, m.title, m.meeting_time, m.summary, m.created_at, m.tags,
-                   m.user_id as creator_id, u.caption as creator_username, m.prompt_id,
+                   m.user_id as creator_id, u.caption as creator_username, u.username as creator_account, m.prompt_id,
                   af.file_path as audio_file_path, af.duration as audio_duration,
                   p.name as prompt_name, m.access_password
            FROM meetings m 
@ -505,7 +505,8 @@ def get_meeting_details(meeting_id: int, current_user: dict = Depends(get_curren
            meeting_id=meeting['meeting_id'], title=meeting['title'], meeting_time=meeting['meeting_time'],
            summary=meeting['summary'], created_at=meeting['created_at'], attendees=attendees,
            attendee_ids=[row['user_id'] for row in attendees_data],
-            creator_id=meeting['creator_id'], creator_username=meeting['creator_username'], tags=tags,
+            creator_id=meeting['creator_id'], creator_username=meeting['creator_username'],
+            creator_account=meeting.get('creator_account'), tags=tags,
            prompt_id=meeting.get('prompt_id'),
            prompt_name=meeting.get('prompt_name'),
            overall_status=overall_status.get('overall_status'),
@ -725,7 +726,6 @@ async def upload_audio(
    meeting_dir.mkdir(exist_ok=True)
    unique_filename = f"{uuid.uuid4()}{file_extension}"
    absolute_path = meeting_dir / unique_filename
-    relative_path = absolute_path.relative_to(BASE_DIR)

    try:
        with open(absolute_path, "wb") as buffer:
@ -733,17 +733,28 @@ async def upload_audio(
    except Exception as e:
        return create_api_response(code="500", message=f"保存文件失败: {str(e)}")

-    # 3.5 获取音频时长
-    audio_duration = 0
+    # 3.5 统一做音频预处理
    try:
-        audio_duration = get_audio_duration(str(absolute_path))
-        print(f"音频时长: {audio_duration}秒")
+        preprocess_result = audio_preprocess_service.preprocess(absolute_path)
+        processed_absolute_path = preprocess_result.file_path
+        audio_duration = preprocess_result.metadata.duration_seconds
+        print(
+            f"音频预处理完成: source={absolute_path.name}, "
+            f"target={processed_absolute_path.name}, duration={audio_duration}s, "
+            f"applied={preprocess_result.applied}"
+        )
    except Exception as e:
-        print(f"警告: 获取音频时长失败，但不影响后续流程: {e}")
+        if absolute_path.exists():
+            try:
+                os.remove(absolute_path)
+            except OSError:
+                pass
+        return create_api_response(code="500", message=f"音频预处理失败: {str(e)}")

-    file_path = '/' + str(relative_path)
-    file_name = audio_file.filename
-    file_size = audio_file.size
+    processed_relative_path = processed_absolute_path.relative_to(BASE_DIR)
+    file_path = '/' + str(processed_relative_path)
+    file_name = preprocess_result.file_name
+    file_size = preprocess_result.file_size

    # 4. 调用 audio_service 处理文件（权限检查、数据库更新、启动转录）
    result = handle_audio_upload(
@ -761,14 +772,26 @@ async def upload_audio(

    # 如果不成功，删除已保存的文件并返回错误
    if not result["success"]:
-        if absolute_path.exists():
-            try:
-                os.remove(absolute_path)
-                print(f"Deleted file due to processing error: {absolute_path}")
-            except Exception as e:
-                print(f"Warning: Failed to delete file {absolute_path}: {e}")
+        cleanup_paths = [processed_absolute_path]
+        if processed_absolute_path != absolute_path:
+            cleanup_paths.append(absolute_path)
+
+        for cleanup_path in cleanup_paths:
+            if cleanup_path.exists():
+                try:
+                    os.remove(cleanup_path)
+                    print(f"Deleted file due to processing error: {cleanup_path}")
+                except Exception as e:
+                    print(f"Warning: Failed to delete file {cleanup_path}: {e}")
        return result["response"]

+    if preprocess_result.applied and processed_absolute_path != absolute_path and absolute_path.exists():
+        try:
+            os.remove(absolute_path)
+            print(f"Deleted original uploaded audio after preprocessing: {absolute_path}")
+        except Exception as e:
+            print(f"Warning: Failed to delete original uploaded audio {absolute_path}: {e}")
+
    # 5. 返回成功响应
    transcription_task_id = result["transcription_task_id"]
    message_suffix = ""
--- a/backend/app/services/system_config_service.py
+++ b/backend/app/services/system_config_service.py
@ -120,33 +120,8 @@ class SystemConfigService:
            cfg["audio_scene"] = audio_row["audio_scene"]
        if audio_row.get("hot_word_group_id") is not None:
            cfg["hot_word_group_id"] = audio_row["hot_word_group_id"]
-
-        if audio_row.get("audio_scene") == "asr":
-            if extra_config.get("model") is None and audio_row.get("asr_model_name") is not None:
-                extra_config["model"] = audio_row["asr_model_name"]
-            if extra_config.get("vocabulary_id") is None and audio_row.get("asr_vocabulary_id") is not None:
-                extra_config["vocabulary_id"] = audio_row["asr_vocabulary_id"]
-            if extra_config.get("speaker_count") is None and audio_row.get("asr_speaker_count") is not None:
-                extra_config["speaker_count"] = audio_row["asr_speaker_count"]
-            if extra_config.get("language_hints") is None and audio_row.get("asr_language_hints"):
-                extra_config["language_hints"] = audio_row["asr_language_hints"]
-            if extra_config.get("disfluency_removal_enabled") is None and audio_row.get("asr_disfluency_removal_enabled") is not None:
-                extra_config["disfluency_removal_enabled"] = bool(audio_row["asr_disfluency_removal_enabled"])
-            if extra_config.get("diarization_enabled") is None and audio_row.get("asr_diarization_enabled") is not None:
-                extra_config["diarization_enabled"] = bool(audio_row["asr_diarization_enabled"])
-        else:
-            if extra_config.get("model") is None and audio_row.get("model_name"):
-                extra_config["model"] = audio_row["model_name"]
-            if extra_config.get("template_text") is None and audio_row.get("vp_template_text") is not None:
-                extra_config["template_text"] = audio_row["vp_template_text"]
-            if extra_config.get("duration_seconds") is None and audio_row.get("vp_duration_seconds") is not None:
-                extra_config["duration_seconds"] = audio_row["vp_duration_seconds"]
-            if extra_config.get("sample_rate") is None and audio_row.get("vp_sample_rate") is not None:
-                extra_config["sample_rate"] = audio_row["vp_sample_rate"]
-            if extra_config.get("channels") is None and audio_row.get("vp_channels") is not None:
-                extra_config["channels"] = audio_row["vp_channels"]
-            if extra_config.get("max_size_bytes") is None and audio_row.get("vp_max_size_bytes") is not None:
-                extra_config["max_size_bytes"] = audio_row["vp_max_size_bytes"]
+        if audio_row.get("request_timeout_seconds") is not None:
+            cfg["request_timeout_seconds"] = int(audio_row["request_timeout_seconds"])

        language_hints = cls._normalize_string_list(extra_config.get("language_hints"))
        if language_hints is not None:
@ -162,11 +137,8 @@ class SystemConfigService:
                cursor = conn.cursor(dictionary=True)
                cursor.execute(
                    """
-                    SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key, hot_word_group_id,
-                           asr_model_name, asr_vocabulary_id, asr_speaker_count, asr_language_hints,
-                           asr_disfluency_removal_enabled, asr_diarization_enabled,
-                           vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes,
-                           extra_config
+                    SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key,
+                           request_timeout_seconds, hot_word_group_id, extra_config
                    FROM audio_model_config
                    WHERE audio_scene = %s AND is_active = 1
                    ORDER BY is_default DESC, updated_at DESC, config_id ASC
@ -260,11 +232,8 @@ class SystemConfigService:

                cursor.execute(
                    """
-                    SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key, hot_word_group_id,
-                           asr_model_name, asr_vocabulary_id, asr_speaker_count, asr_language_hints,
-                           asr_disfluency_removal_enabled, asr_diarization_enabled,
-                           vp_template_text, vp_duration_seconds, vp_sample_rate, vp_channels, vp_max_size_bytes,
-                           extra_config
+                    SELECT model_code, model_name, audio_scene, provider, endpoint_url, api_key,
+                           request_timeout_seconds, hot_word_group_id, extra_config
                    FROM audio_model_config
                    WHERE model_code = %s AND is_active = 1
                    ORDER BY is_default DESC, config_id ASC
@ -427,28 +396,30 @@ class SystemConfigService:
                    cursor.execute(
                        """
                        INSERT INTO audio_model_config
-                        (model_code, model_name, audio_scene, provider, asr_model_name, asr_vocabulary_id, asr_speaker_count,
-                         asr_language_hints, asr_disfluency_removal_enabled, asr_diarization_enabled, description, is_active, is_default)
+                        (model_code, model_name, audio_scene, provider, request_timeout_seconds, extra_config, description, is_active, is_default)
                        VALUES (
                            'audio_model',
                            '音频识别模型',
                            'asr',
                            'dashscope',
-                            'paraformer-v2',
-                            %s,
-                            10,
-                            'zh,en',
-                            1,
-                            1,
+                            300,
+                            JSON_OBJECT(
+                                'model', 'paraformer-v2',
+                                'vocabulary_id', %s,
+                                'speaker_count', 10,
+                                'language_hints', JSON_ARRAY('zh', 'en'),
+                                'disfluency_removal_enabled', TRUE,
+                                'diarization_enabled', TRUE
+                            ),
                            '语音识别模型配置',
                            1,
                            1
                        )
                        ON DUPLICATE KEY UPDATE
-                            asr_vocabulary_id = VALUES(asr_vocabulary_id),
+                            extra_config = JSON_SET(COALESCE(extra_config, JSON_OBJECT()), '$.vocabulary_id', %s),
                            is_active = 1
                        """,
-                        (str(value),),
+                        (str(value), str(value)),
                    )
                conn.commit()
                cursor.close()
@ -626,7 +597,6 @@ class SystemConfigService:
        audio_cfg = cls.get_active_audio_model_config("asr")
        if audio_cfg.get("vocabulary_id"):
            return audio_cfg["vocabulary_id"]
-        # 回退：直接读 audio_model_config.asr_vocabulary_id
        audio_vocab = cls.get_config_attribute('audio_model', 'vocabulary_id')
        if audio_vocab:
            return audio_vocab
--- a/backend/app/utils/audio_parser.py
+++ b/backend/app/utils/audio_parser.py
@ -3,15 +3,16 @@

 用于解析音频文件的元数据信息，如时长、采样率、编码格式等
 """
-
-from tinytag import TinyTag
+import json
+import shutil
+import subprocess


 def get_audio_duration(file_path: str) -> int:
    """
    获取音频文件时长（秒）

-    使用TinyTag读取音频文件时长
+    使用 ffprobe 读取音频时长

    Args:
        file_path: 音频文件的完整路径
@ -26,13 +27,33 @@ def get_audio_duration(file_path: str) -> int:
        - WAV (.wav)
        - OGG (.ogg)
        - FLAC (.flac)
-        - 以及TinyTag支持的其他音频格式
+        - 以及 ffprobe 支持的其他音频格式
    """
+    ffprobe_path = shutil.which("ffprobe")
+    if not ffprobe_path:
+        return 0
+
    try:
-        tag = TinyTag.get(file_path)
-        if tag.duration and tag.duration > 0:
-            return int(tag.duration)
+        completed = subprocess.run(
+            [
+                ffprobe_path,
+                "-v",
+                "error",
+                "-print_format",
+                "json",
+                "-show_format",
+                str(file_path),
+            ],
+            check=False,
+            capture_output=True,
+            text=True,
+        )
+        if completed.returncode == 0 and completed.stdout:
+            payload = json.loads(completed.stdout)
+            duration_value = (payload.get("format") or {}).get("duration")
+            if duration_value:
+                return int(float(duration_value))
    except Exception as e:
-        print(f"获取音频时长失败 ({file_path}): {e}")
+        print(f"ffprobe 获取音频时长失败 ({file_path}): {e}")

    return 0
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -22,6 +22,4 @@ psutil
 # APK Parsing
 pyaxmlparser

-# Audio Metadata
-tinytag
 python-dotenv
--- a/backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql
+++ b/backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql
@ -0,0 +1,105 @@
+SET @request_timeout_exists := (
+  SELECT COUNT(*)
+  FROM information_schema.COLUMNS
+  WHERE TABLE_SCHEMA = DATABASE()
+    AND TABLE_NAME = 'audio_model_config'
+    AND COLUMN_NAME = 'request_timeout_seconds'
+);
+SET @sql := IF(
+  @request_timeout_exists = 0,
+  'ALTER TABLE `audio_model_config` ADD COLUMN `request_timeout_seconds` int(11) NOT NULL DEFAULT 300 COMMENT ''音频转录请求超时（秒）'' AFTER `api_key`',
+  'SELECT 1'
+);
+PREPARE stmt FROM @sql;
+EXECUTE stmt;
+DEALLOCATE PREPARE stmt;
+
+SET @has_asr_legacy := (
+  SELECT COUNT(*)
+  FROM information_schema.COLUMNS
+  WHERE TABLE_SCHEMA = DATABASE()
+    AND TABLE_NAME = 'audio_model_config'
+    AND COLUMN_NAME = 'asr_model_name'
+);
+SET @sql := IF(
+  @has_asr_legacy > 0,
+  'UPDATE `audio_model_config`
+   SET `extra_config` = JSON_SET(
+     COALESCE(`extra_config`, JSON_OBJECT()),
+     ''$.model'', `asr_model_name`,
+     ''$.vocabulary_id'', `asr_vocabulary_id`,
+     ''$.speaker_count'', `asr_speaker_count`,
+     ''$.language_hints'', `asr_language_hints`,
+     ''$.disfluency_removal_enabled'', `asr_disfluency_removal_enabled`,
+     ''$.diarization_enabled'', `asr_diarization_enabled`
+   )
+   WHERE `audio_scene` = ''asr''',
+  'SELECT 1'
+);
+PREPARE stmt FROM @sql;
+EXECUTE stmt;
+DEALLOCATE PREPARE stmt;
+
+SET @has_voiceprint_legacy := (
+  SELECT COUNT(*)
+  FROM information_schema.COLUMNS
+  WHERE TABLE_SCHEMA = DATABASE()
+    AND TABLE_NAME = 'audio_model_config'
+    AND COLUMN_NAME = 'vp_template_text'
+);
+SET @sql := IF(
+  @has_voiceprint_legacy > 0,
+  'UPDATE `audio_model_config`
+   SET `extra_config` = JSON_SET(
+     COALESCE(`extra_config`, JSON_OBJECT()),
+     ''$.template_text'', `vp_template_text`,
+     ''$.duration_seconds'', `vp_duration_seconds`,
+     ''$.sample_rate'', `vp_sample_rate`,
+     ''$.channels'', `vp_channels`,
+     ''$.max_size_bytes'', `vp_max_size_bytes`
+   )
+   WHERE `audio_scene` = ''voiceprint''',
+  'SELECT 1'
+);
+PREPARE stmt FROM @sql;
+EXECUTE stmt;
+DEALLOCATE PREPARE stmt;
+
+UPDATE `audio_model_config`
+SET `request_timeout_seconds` = COALESCE(
+  NULLIF(`request_timeout_seconds`, 0),
+  CAST(JSON_UNQUOTE(JSON_EXTRACT(`extra_config`, '$.request_timeout_seconds')) AS UNSIGNED),
+  300
+);
+
+SET SESSION group_concat_max_len = 8192;
+SELECT GROUP_CONCAT(CONCAT('DROP COLUMN `', COLUMN_NAME, '`') ORDER BY ORDINAL_POSITION SEPARATOR ', ')
+INTO @drop_columns_sql
+FROM information_schema.COLUMNS
+WHERE TABLE_SCHEMA = DATABASE()
+  AND TABLE_NAME = 'audio_model_config'
+  AND COLUMN_NAME IN (
+    'asr_model_name',
+    'asr_vocabulary_id',
+    'asr_speaker_count',
+    'asr_language_hints',
+    'asr_disfluency_removal_enabled',
+    'asr_diarization_enabled',
+    'vp_template_text',
+    'vp_duration_seconds',
+    'vp_sample_rate',
+    'vp_channels',
+    'vp_max_size_bytes'
+  );
+
+SET @sql := IF(
+  @drop_columns_sql IS NULL OR @drop_columns_sql = '',
+  'SELECT 1',
+  CONCAT('ALTER TABLE `audio_model_config` ', @drop_columns_sql)
+);
+PREPARE stmt FROM @sql;
+EXECUTE stmt;
+DEALLOCATE PREPARE stmt;
+
+DROP TABLE IF EXISTS `ai_model_configs`;
+DROP TABLE IF EXISTS `ai_model_config`;
--- a/deploy.md
+++ b/deploy.md
@ -3,3 +3,8 @@
 # 组件
 + 数据库 mysql 5.7+  10.100.51.51:3306  root | Unis@123
 + 缓存 redis 6.2  10.100.51.51:6379  Unis@123
+
+# 升级前确认
+ 后端运行环境需提供 `ffmpeg` 与 `ffprobe`
+ 本次数据库升级包含 `backend/sql/migrations/cleanup_audio_model_config_and_drop_legacy_ai_tables.sql`
+ 升级后 `audio_model_config` 将新增 `request_timeout_seconds`，并清理旧的 ASR/声纹冗余列
--- a/frontend/src/hooks/useAdminDashboardPage.js
+++ b/frontend/src/hooks/useAdminDashboardPage.js
@ -172,7 +172,7 @@ export default function useAdminDashboardPage() {

  const handleDownloadTranscript = async (meetingId) => {
    try {
-      const response = await apiClient.get(buildApiUrl(`/api/meetings/${meetingId}/transcript`));
+      const response = await apiClient.get(buildApiUrl(API_ENDPOINTS.MEETINGS.TRANSCRIPT(meetingId)));
      if (response.code === '200') {
        const dataStr = JSON.stringify(response.data, null, 2);
        const blob = new Blob([dataStr], { type: 'application/json' });
@ -190,6 +190,32 @@ export default function useAdminDashboardPage() {
    }
  };

+  const handleDownloadAudio = async (meetingId, audioFilePath) => {
+    try {
+      const response = await fetch(buildApiUrl(`${API_ENDPOINTS.MEETINGS.AUDIO(meetingId)}/stream`), {
+        credentials: 'include',
+      });
+      if (!response.ok) {
+        throw new Error(`audio download failed: ${response.status}`);
+      }
+
+      const blob = await response.blob();
+      const url = URL.createObjectURL(blob);
+      const link = document.createElement('a');
+      const fileNameFromPath = audioFilePath?.split('/').pop();
+      const fallbackExtension = fileNameFromPath?.includes('.') ? '' : '.mp3';
+      link.href = url;
+      link.download = fileNameFromPath || `meeting_audio_${meetingId}${fallbackExtension}`;
+      document.body.appendChild(link);
+      link.click();
+      document.body.removeChild(link);
+      URL.revokeObjectURL(url);
+    } catch (error) {
+      console.error('下载音频失败:', error);
+      message.error('下载音频失败');
+    }
+  };
+
  const closeMeetingModal = () => {
    setShowMeetingModal(false);
    setMeetingDetails(null);
@ -225,6 +251,7 @@ export default function useAdminDashboardPage() {
    handleKickUser,
    handleViewMeeting,
    handleDownloadTranscript,
+    handleDownloadAudio,
    closeMeetingModal,
    taskCompletionRate,
  };
--- a/frontend/src/pages/AdminDashboard.jsx
+++ b/frontend/src/pages/AdminDashboard.jsx
@ -55,6 +55,16 @@ const STATUS_MAP = {
 };

 const formatResourcePercent = (value) => `${Number(value || 0).toFixed(1)}%`;
+const formatAudioDuration = (duration) => {
+  if (!duration && duration !== 0) {
+    return '无时长信息';
+  }
+
+  const totalSeconds = Math.max(0, Math.floor(Number(duration) || 0));
+  const minutes = Math.floor(totalSeconds / 60);
+  const seconds = totalSeconds % 60;
+  return `${minutes}分${seconds}秒`;
+};

 const AdminDashboard = () => {
  const {
@ -80,6 +90,7 @@ const AdminDashboard = () => {
    handleKickUser,
    handleViewMeeting,
    handleDownloadTranscript,
+    handleDownloadAudio,
    closeMeetingModal,
    taskCompletionRate,
  } = useAdminDashboardPage();
@ -167,6 +178,13 @@ const AdminDashboard = () => {
        </Space>
      ),
    },
+    {
+      title: '关联账号',
+      dataIndex: 'creator_name',
+      key: 'creator_name',
+      width: 140,
+      render: (text) => text || '-',
+    },
    {
      title: '状态',
      dataIndex: 'status',
@ -384,14 +402,25 @@ const AdminDashboard = () => {
        ) : meetingDetails ? (
          <Descriptions bordered column={1} size="small">
            <Descriptions.Item label="会议名称">{meetingDetails.title}</Descriptions.Item>
+            <Descriptions.Item label="关联账号">{meetingDetails.creator_account || '-'}</Descriptions.Item>
            <Descriptions.Item label="开始时间">
              {meetingDetails.meeting_time ? new Date(meetingDetails.meeting_time).toLocaleString() : '-'}
            </Descriptions.Item>
            <Descriptions.Item label="使用模版">{meetingDetails.prompt_name || '默认模版'}</Descriptions.Item>
            <Descriptions.Item label="音频信息">
-              {meetingDetails.audio_duration
-                ? `${Math.floor(meetingDetails.audio_duration / 60)}分${Math.floor(meetingDetails.audio_duration % 60)}秒`
-                : '无时长信息'}
+              <Space size="middle">
+                <span>{formatAudioDuration(meetingDetails.audio_duration)}</span>
+                {meetingDetails.audio_file_path ? (
+                  <Button
+                    type="link"
+                    size="small"
+                    style={{ padding: 0 }}
+                    onClick={() => handleDownloadAudio(meetingDetails.meeting_id, meetingDetails.audio_file_path)}
+                  >
+                    下载音频
+                  </Button>
+                ) : null}
+              </Space>
            </Descriptions.Item>
            <Descriptions.Item label="操作">
              <ActionButton tone="view" variant="textLg" icon={<FileTextOutlined />} onClick={() => handleDownloadTranscript(meetingDetails.meeting_id)}>
--- a/frontend/src/pages/admin/ModelManagement.jsx
+++ b/frontend/src/pages/admin/ModelManagement.jsx
@ -165,6 +165,7 @@ const ModelManagement = () => {
      provider: values.provider,
      endpoint_url: values.endpoint_url,
      api_key: values.api_key,
+      request_timeout_seconds: values.request_timeout_seconds,
      hot_word_group_id: values.hot_word_group_id || null,
      extra_config: extraConfig,
      description: values.description,
@ -233,6 +234,7 @@ const ModelManagement = () => {
        endpoint_url: 'https://dashscope.aliyuncs.com/api/v1/services/audio/asr/transcription',
        api_key: '',
        service_model_name: 'paraformer-v2',
+        request_timeout_seconds: 300,
        hot_word_group_id: undefined,
        asr_speaker_count: 10,
        asr_language_hints: 'zh,en',
@ -267,24 +269,25 @@ const ModelManagement = () => {
      const extraConfig = normalizeAudioExtraConfig(row);
      form.setFieldsValue({
        ...row,
+        request_timeout_seconds: row.request_timeout_seconds ?? 300,
        hot_word_group_id: row.hot_word_group_id || undefined,
-        service_model_name: row.service_model_name || extraConfig.model || row.asr_model_name || row.model_name,
-        asr_speaker_count: extraConfig.speaker_count ?? row.asr_speaker_count,
+        service_model_name: row.service_model_name || extraConfig.model || row.model_name,
+        asr_speaker_count: extraConfig.speaker_count,
        asr_language_hints: Array.isArray(extraConfig.language_hints)
          ? extraConfig.language_hints.join(',')
-          : extraConfig.language_hints || row.asr_language_hints,
-        asr_disfluency_removal_enabled: extraConfig.disfluency_removal_enabled ?? Boolean(row.asr_disfluency_removal_enabled),
-        asr_diarization_enabled: extraConfig.diarization_enabled ?? Boolean(row.asr_diarization_enabled),
+          : extraConfig.language_hints,
+        asr_disfluency_removal_enabled: extraConfig.disfluency_removal_enabled ?? false,
+        asr_diarization_enabled: extraConfig.diarization_enabled ?? false,
        asr_timestamp_alignment_enabled: extraConfig.timestamp_alignment_enabled ?? false,
        asr_channel_id: Array.isArray(extraConfig.channel_id) ? extraConfig.channel_id.join(',') : extraConfig.channel_id,
        asr_special_word_filter: extraConfig.special_word_filter,
        asr_audio_event_detection_enabled: extraConfig.audio_event_detection_enabled ?? false,
        asr_phrase_id: extraConfig.phrase_id,
-        vp_template_text: extraConfig.template_text ?? row.vp_template_text,
-        vp_duration_seconds: extraConfig.duration_seconds ?? row.vp_duration_seconds,
-        vp_sample_rate: extraConfig.sample_rate ?? row.vp_sample_rate,
-        vp_channels: extraConfig.channels ?? row.vp_channels,
-        vp_max_size_bytes: extraConfig.max_size_bytes ?? row.vp_max_size_bytes,
+        vp_template_text: extraConfig.template_text,
+        vp_duration_seconds: extraConfig.duration_seconds,
+        vp_sample_rate: extraConfig.sample_rate,
+        vp_channels: extraConfig.channels,
+        vp_max_size_bytes: extraConfig.max_size_bytes,
      });
    }
    setDrawerOpen(true);
@ -419,10 +422,10 @@ const ModelManagement = () => {
      key: 'core',
      render: (_, row) => {
        const extraConfig = normalizeAudioExtraConfig(row);
-        const serviceModelName = row.service_model_name || extraConfig.model || row.asr_model_name || row.model_name;
+        const serviceModelName = row.service_model_name || extraConfig.model || row.model_name;
        return row.audio_scene === 'voiceprint'
-          ? `模型=${serviceModelName || '-'} 时长=${extraConfig.duration_seconds || row.vp_duration_seconds || '-'}s 采样=${extraConfig.sample_rate || row.vp_sample_rate || '-'}`
-          : `模型=${serviceModelName || '-'} 热词组=${row.hot_word_group_name || '未关联'}`;
+          ? `模型=${serviceModelName || '-'} 时长=${extraConfig.duration_seconds || '-'}s 采样=${extraConfig.sample_rate || '-'}`
+          : `模型=${serviceModelName || '-'} 超时=${row.request_timeout_seconds || 300}s 热词组=${row.hot_word_group_name || '未关联'}`;
      },
    },
    { title: '状态', dataIndex: 'is_active', key: 'is_active', width: 90, render: (v) => <StatusTag active={v} /> },
@ -628,6 +631,9 @@ const ModelManagement = () => {
            </>
          ) : (
            <>
+              <Form.Item name="request_timeout_seconds" label="转录超时(秒)" rules={[{ required: true, message: '请输入超时秒数' }]}>
+                <InputNumber min={10} max={3600} />
+              </Form.Item>
              <Form.Item name="hot_word_group_id" label="热词组">
                <Select
                  allowClear