diff --git a/.dockerignore b/.dockerignore index a2e82a0..0d82a62 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,7 +6,14 @@ frontend/node_modules frontend/dist backend/venv -data +data/* +!data/templates/ +!data/templates/** +!data/skills/ +!data/skills/** +!data/model/ +data/model/* +!data/model/README.md workspace **/__pycache__ diff --git a/.env.full.example b/.env.full.example index 5ea02aa..5c7d19b 100644 --- a/.env.full.example +++ b/.env.full.example @@ -1,9 +1,8 @@ # Public exposed port (only nginx is exposed) NGINX_PORT=8080 -# Required absolute host paths. -# They must exist or be creatable by the deployment user and writable by docker daemon. -HOST_DATA_ROOT=/opt/dashboard-nanobot/data +# Project data is always mounted from the repository root `./data`. +# Only workspace root still needs an absolute host path. HOST_BOTS_WORKSPACE_ROOT=/opt/dashboard-nanobot/workspace/bots # Optional custom image tags @@ -72,7 +71,7 @@ WORKSPACE_DOWNLOAD_EXTENSIONS=.pdf,.doc,.docx,.xls,.xlsx,.xlsm,.ppt,.pptx,.odt,. # Local speech-to-text (Whisper via whisper.cpp model file) STT_ENABLED=true STT_MODEL=ggml-small-q8_0.bin -STT_MODEL_DIR=${HOST_DATA_ROOT}/model +STT_MODEL_DIR=/app/data/model STT_DEVICE=cpu STT_MAX_AUDIO_SECONDS=20 STT_DEFAULT_LANGUAGE=zh diff --git a/.env.prod.example b/.env.prod.example index 791f524..c068b26 100644 --- a/.env.prod.example +++ b/.env.prod.example @@ -1,9 +1,8 @@ # Public exposed port (only nginx is exposed) NGINX_PORT=8080 -# REQUIRED absolute host paths. -# They must exist and be writable by docker daemon. -HOST_DATA_ROOT=/opt/dashboard-nanobot/data +# Project data is always mounted from the repository root `./data`. +# Only workspace root still needs an absolute host path. HOST_BOTS_WORKSPACE_ROOT=/opt/dashboard-nanobot/workspace/bots # Optional custom image tags @@ -25,7 +24,7 @@ NPM_REGISTRY=https://registry.npmmirror.com # Database (choose one: SQLite / PostgreSQL / MySQL) # SQLite example: -# DATABASE_URL=sqlite:///${HOST_DATA_ROOT}/nanobot_dashboard.db +# DATABASE_URL=sqlite:////app/data/nanobot_dashboard.db # PostgreSQL example: # DATABASE_URL=postgresql+psycopg://user:password@127.0.0.1:5432/nanobot_dashboard # MySQL example: @@ -65,7 +64,7 @@ WORKSPACE_DOWNLOAD_EXTENSIONS=.pdf,.doc,.docx,.xls,.xlsx,.xlsm,.ppt,.pptx,.odt,. # Local speech-to-text (Whisper via whisper.cpp model file) STT_ENABLED=true STT_MODEL=ggml-small-q8_0.bin -STT_MODEL_DIR=${HOST_DATA_ROOT}/model +STT_MODEL_DIR=/app/data/model STT_DEVICE=cpu STT_MAX_AUDIO_SECONDS=20 STT_DEFAULT_LANGUAGE=zh diff --git a/.gitignore b/.gitignore index b58757e..685dd66 100644 --- a/.gitignore +++ b/.gitignore @@ -30,7 +30,14 @@ backend/__pycache__/ backend/*.log # Project runtime data (generated locally) -data/ +data/* +!data/templates/ +!data/templates/** +!data/skills/ +!data/skills/** +!data/model/ +data/model/* +!data/model/README.md workspace/ engines/ diff --git a/README.md b/README.md index 7561d82..477af05 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Dashboard Nanobot 是面向 `nanobot` 的控制平面项目,提供镜像管理 - `USER.md` - `TOOLS.md` - `IDENTITY.md` -- 模板管理:系统级模板改为文件化配置(`backend/templates/agent_md_templates.json` 与 `backend/templates/topic_presets.json`)。 +- 模板管理:系统级模板改为文件化配置(`data/templates/agent_md_templates.json` 与 `data/templates/topic_presets.json`)。 - 2D 运维 Dashboard:Bot 列表、启停、命令发送、日志流、遥测。 - UI 全局支持:Light/Dark 切换、中文/English 切换。 @@ -57,6 +57,13 @@ graph TD - 架构设计:`design/architecture.md` - 数据库设计:`design/database.md` +## 默认资源 + +- 项目根目录 `data/templates/` 保存默认模板资源,会在初始化时同步到运行时数据目录。 +- 项目根目录 `data/skills/` 保存默认 skill 包,会在数据库初始化阶段自动注册到 `skill_market_item`。 +- `data/model/` 不包含语音识别模型文件;模型需要用户自行下载放入该目录或 `STT_MODEL_DIR` 指向的目录。 +- 如果语音模型缺失,后端启动时会打印明确告警,但不会阻断服务启动。 + ## 环境变量配置 - 后端: @@ -102,10 +109,10 @@ graph TD 1. 准备部署变量 - 复制 `.env.prod.example` 为 `.env.prod`(位于项目根目录) - - 配置绝对路径: - - `HOST_DATA_ROOT` + - `data/` 会自动映射到宿主机项目根目录下的 `./data` + - 只需要配置绝对路径: - `HOST_BOTS_WORKSPACE_ROOT` - - 如启用本地语音识别,请将 Whisper `.bin` 模型文件放到 `${HOST_DATA_ROOT}/model/` + - 如启用本地语音识别,请将 Whisper `.bin` 模型文件放到宿主机项目根目录的 `data/model/` 并让 `STT_MODEL` 指向完整文件名,例如 `ggml-small-q8_0.bin` - 中国网络建议配置加速项: - `PIP_INDEX_URL`、`PIP_TRUSTED_HOST` @@ -122,6 +129,7 @@ graph TD - `backend` 不开放宿主机端口,仅在内部网络被 Nginx 访问。 - 上传大小使用单一参数 `UPLOAD_MAX_MB` 控制(后端校验 + Nginx 限制)。 - 必须挂载 `/var/run/docker.sock`,否则后端无法操作 Bot 镜像与容器。 +- `data/` 始终绑定到宿主机项目根目录下的 `./data`,其中模板、默认 skills、语音模型和运行数据都落在这里。 - `HOST_BOTS_WORKSPACE_ROOT` 必须是宿主机绝对路径,并且在 `docker-compose.prod.yml` 中以“同路径”挂载到后端容器。 原因:后端通过 Docker API 创建 Bot 容器时,使用的是宿主机可见的 bind 路径。 - 语音识别当前基于 `pywhispercpp==1.3.1` + Whisper `.bin` 模型文件,不使用 `faster-whisper`。 @@ -144,13 +152,13 @@ graph TD 1. 准备部署变量 - 复制 `.env.full.example` 为 `.env.full` + - `data/` 会自动映射到宿主机项目根目录下的 `./data` - 必填修改: - - `HOST_DATA_ROOT` - `HOST_BOTS_WORKSPACE_ROOT` - `POSTGRES_SUPERPASSWORD` - `POSTGRES_APP_PASSWORD` - `PANEL_ACCESS_PASSWORD` - - 如启用本地语音识别,请将 Whisper `.bin` 模型文件放到 `${HOST_DATA_ROOT}/model/` + - 如启用本地语音识别,请将 Whisper `.bin` 模型文件放到宿主机项目根目录的 `data/model/` 2. 启动完整栈 - `./scripts/deploy-full.sh` 3. 访问 @@ -173,5 +181,5 @@ graph TD ### 注意事项 - `deploy-prod.sh` 和 `deploy-full.sh` 使用的是两套 compose 文件,但复用了相同容器名,不能同时在同一台机器上并行启动。 -- PostgreSQL 数据默认落盘到 `${HOST_DATA_ROOT}/postgres`,Redis 数据默认落盘到 `${HOST_DATA_ROOT}/redis`。 +- PostgreSQL 数据默认落盘到宿主机项目根目录 `./data/postgres`,Redis 数据默认落盘到 `./data/redis`。 - 如果你只想保留前后端容器,继续使用 `deploy-prod.sh`;如果希望把依赖也打包进来,使用 `deploy-full.sh`。 diff --git a/backend/Dockerfile b/backend/Dockerfile index a96c58c..c5220c1 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -20,6 +20,7 @@ RUN if [ -n "${PIP_INDEX_URL}" ]; then pip config set global.index-url "${PIP_IN && pip install -r requirements.txt COPY backend/ /app/backend/ +COPY data/ /app/data/ EXPOSE 8000 diff --git a/backend/api/system_router.py b/backend/api/system_router.py index 6de2288..b9f9340 100644 --- a/backend/api/system_router.py +++ b/backend/api/system_router.py @@ -1,5 +1,6 @@ from fastapi import APIRouter, HTTPException +from core.speech_service import inspect_speech_model_status from core.utils import _get_default_system_timezone from schemas.system import SystemTemplatesUpdateRequest from services.platform_service import get_platform_settings_snapshot, get_speech_runtime_settings @@ -17,6 +18,7 @@ def get_system_defaults(): md_templates = get_agent_md_templates() platform_settings = get_platform_settings_snapshot() speech_settings = get_speech_runtime_settings() + model_status = inspect_speech_model_status() return { "templates": md_templates, "limits": { @@ -41,6 +43,9 @@ def get_system_defaults(): "device": speech_settings["device"], "max_audio_seconds": speech_settings["max_audio_seconds"], "default_language": speech_settings["default_language"], + "ready": model_status["ready"], + "message": model_status["message"], + "expected_path": model_status["expected_path"], }, } diff --git a/backend/bootstrap/app_runtime.py b/backend/bootstrap/app_runtime.py index 6922840..f341c3b 100644 --- a/backend/bootstrap/app_runtime.py +++ b/backend/bootstrap/app_runtime.py @@ -6,10 +6,12 @@ from sqlmodel import Session, select from core.cache import cache from core.database import engine, init_database from core.docker_instance import docker_manager +from core.speech_service import inspect_speech_model_status from core.settings import DATABASE_URL_DISPLAY, REDIS_ENABLED from models.bot import BotInstance from services.bot_storage_service import _migrate_bot_resources_store -from services.platform_service import prune_expired_activity_events +from services.default_assets_service import ensure_default_skill_market_items, ensure_runtime_data_assets +from services.platform_service import get_speech_runtime_settings, prune_expired_activity_events from services.runtime_service import docker_callback, set_main_loop @@ -29,11 +31,31 @@ def register_app_runtime(app: FastAPI) -> None: current_loop = asyncio.get_running_loop() app.state.main_loop = current_loop set_main_loop(current_loop) + asset_report = ensure_runtime_data_assets() + if asset_report["templates_initialized"] or asset_report["skills_synchronized"]: + print( + "[init] 默认资源已同步 " + f"(templates={asset_report['templates_initialized']}, skills={asset_report['skills_synchronized']})" + ) init_database() with Session(engine) as session: + skill_report = ensure_default_skill_market_items(session) + if skill_report["created"] or skill_report["updated"]: + print( + "[init] 默认 skills 已入库 " + f"(created={len(skill_report['created'])}, updated={len(skill_report['updated'])})" + ) prune_expired_activity_events(session, force=True) bots = session.exec(select(BotInstance)).all() for bot in bots: _migrate_bot_resources_store(bot.id) docker_manager.ensure_monitor(bot.id, docker_callback) + speech_settings = get_speech_runtime_settings() + model_status = inspect_speech_model_status() + if speech_settings["enabled"]: + if model_status["ready"]: + print(f"🎙️ 语音识别模型就绪: {model_status['resolved_path']}") + else: + hint = f",请将模型文件放到 {model_status['expected_path']}" if model_status["expected_path"] else "" + print(f"⚠️ 语音识别模型未就绪: {model_status['message']}{hint}") print("✅ 启动自检完成") diff --git a/backend/core/settings.py b/backend/core/settings.py index daeb06c..1da5c9f 100644 --- a/backend/core/settings.py +++ b/backend/core/settings.py @@ -117,6 +117,13 @@ DATA_ROOT: Final[str] = _normalize_dir_path(os.getenv("DATA_ROOT", str(PROJECT_R BOTS_WORKSPACE_ROOT: Final[str] = _normalize_dir_path( os.getenv("BOTS_WORKSPACE_ROOT", str(PROJECT_ROOT / "workspace" / "bots")) ) +BUNDLED_DATA_ROOT: Final[Path] = (PROJECT_ROOT / "data").resolve() +RUNTIME_DATA_ROOT: Final[Path] = Path(DATA_ROOT).resolve() +BUNDLED_TEMPLATES_ROOT: Final[Path] = (BUNDLED_DATA_ROOT / "templates").resolve() +RUNTIME_TEMPLATES_ROOT: Final[Path] = (RUNTIME_DATA_ROOT / "templates").resolve() +BUNDLED_SKILLS_ROOT: Final[Path] = (BUNDLED_DATA_ROOT / "skills").resolve() +RUNTIME_SKILLS_ROOT: Final[Path] = (RUNTIME_DATA_ROOT / "skills").resolve() +RUNTIME_MODEL_ROOT: Final[Path] = (RUNTIME_DATA_ROOT / "model").resolve() def _normalize_database_url(url: str) -> str: @@ -205,7 +212,7 @@ DEFAULT_WORKSPACE_DOWNLOAD_EXTENSIONS: Final[tuple[str, ...]] = ( ) STT_ENABLED_DEFAULT: Final[bool] = True STT_MODEL: Final[str] = str(os.getenv("STT_MODEL") or "ggml-small-q8_0.bin").strip() -_DEFAULT_STT_MODEL_DIR: Final[Path] = (Path(DATA_ROOT) / "model").resolve() +_DEFAULT_STT_MODEL_DIR: Final[Path] = RUNTIME_MODEL_ROOT _configured_stt_model_dir = _normalize_dir_path(os.getenv("STT_MODEL_DIR", str(_DEFAULT_STT_MODEL_DIR))) if _configured_stt_model_dir and not Path(_configured_stt_model_dir).exists() and _DEFAULT_STT_MODEL_DIR.exists(): STT_MODEL_DIR: Final[str] = str(_DEFAULT_STT_MODEL_DIR) @@ -240,6 +247,7 @@ APP_HOST: Final[str] = str(os.getenv("APP_HOST") or "0.0.0.0").strip() APP_PORT: Final[int] = _env_int("APP_PORT", 8000, 1, 65535) APP_RELOAD: Final[bool] = _env_bool("APP_RELOAD", False) -TEMPLATE_ROOT: Final[Path] = (BACKEND_ROOT / "templates").resolve() -AGENT_MD_TEMPLATES_FILE: Final[Path] = TEMPLATE_ROOT / "agent_md_templates.json" -TOPIC_PRESETS_TEMPLATES_FILE: Final[Path] = TEMPLATE_ROOT / "topic_presets.json" +AGENT_MD_TEMPLATES_FILE: Final[Path] = RUNTIME_TEMPLATES_ROOT / "agent_md_templates.json" +TOPIC_PRESETS_TEMPLATES_FILE: Final[Path] = RUNTIME_TEMPLATES_ROOT / "topic_presets.json" +BUNDLED_AGENT_MD_TEMPLATES_FILE: Final[Path] = BUNDLED_TEMPLATES_ROOT / "agent_md_templates.json" +BUNDLED_TOPIC_PRESETS_TEMPLATES_FILE: Final[Path] = BUNDLED_TEMPLATES_ROOT / "topic_presets.json" diff --git a/backend/core/speech_service.py b/backend/core/speech_service.py index 6cee286..110fd6e 100644 --- a/backend/core/speech_service.py +++ b/backend/core/speech_service.py @@ -24,6 +24,39 @@ class SpeechDurationError(SpeechServiceError): pass +def inspect_speech_model_status() -> Dict[str, Any]: + service = WhisperSpeechService() + model = str(STT_MODEL or "").strip() + model_dir = str(STT_MODEL_DIR or "").strip() + expected_path = "" + + if model: + if any(sep in model for sep in ("/", "\\")): + expected_path = str(Path(model).expanduser()) + elif model_dir: + expected_path = str((Path(model_dir).expanduser() / model).resolve()) + + try: + resolved_path = service._resolve_model_source() + return { + "ready": True, + "model": model, + "model_dir": model_dir, + "expected_path": expected_path or resolved_path, + "resolved_path": resolved_path, + "message": "", + } + except SpeechServiceError as exc: + return { + "ready": False, + "model": model, + "model_dir": model_dir, + "expected_path": expected_path, + "resolved_path": "", + "message": str(exc), + } + + class WhisperSpeechService: def __init__(self) -> None: self._model: Any = None diff --git a/backend/services/default_assets_service.py b/backend/services/default_assets_service.py new file mode 100644 index 0000000..f9a49ef --- /dev/null +++ b/backend/services/default_assets_service.py @@ -0,0 +1,206 @@ +from __future__ import annotations + +import json +import os +import re +import shutil +import zipfile +from pathlib import Path +from typing import Any, Dict, List + +from sqlmodel import Session, select + +from core.settings import ( + AGENT_MD_TEMPLATES_FILE, + BUNDLED_AGENT_MD_TEMPLATES_FILE, + BUNDLED_SKILLS_ROOT, + BUNDLED_TOPIC_PRESETS_TEMPLATES_FILE, + DATA_ROOT, + RUNTIME_MODEL_ROOT, + RUNTIME_SKILLS_ROOT, + RUNTIME_TEMPLATES_ROOT, + TOPIC_PRESETS_TEMPLATES_FILE, +) +from core.utils import ( + _is_ignored_skill_zip_top_level, + _is_valid_top_level_skill_name, + _read_description_from_text, + _sanitize_skill_market_key, +) +from models.skill import SkillMarketItem + + +def _copy_if_missing(src: Path, dst: Path) -> bool: + if not src.exists() or not src.is_file(): + return False + if src.resolve() == dst.resolve() if dst.exists() else False: + return False + if dst.exists(): + return False + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dst) + return True + + +def _copy_if_different(src: Path, dst: Path) -> bool: + if not src.exists() or not src.is_file(): + return False + if src.resolve() == dst.resolve() if dst.exists() else False: + return False + dst.parent.mkdir(parents=True, exist_ok=True) + if dst.exists(): + try: + if src.stat().st_size == dst.stat().st_size and src.read_bytes() == dst.read_bytes(): + return False + except Exception: + pass + shutil.copy2(src, dst) + return True + + +def _iter_bundled_skill_packages() -> List[Path]: + if not BUNDLED_SKILLS_ROOT.exists() or not BUNDLED_SKILLS_ROOT.is_dir(): + return [] + return sorted(path for path in BUNDLED_SKILLS_ROOT.iterdir() if path.is_file() and path.suffix.lower() == ".zip") + + +def ensure_runtime_data_assets() -> Dict[str, int]: + Path(DATA_ROOT).mkdir(parents=True, exist_ok=True) + RUNTIME_TEMPLATES_ROOT.mkdir(parents=True, exist_ok=True) + RUNTIME_SKILLS_ROOT.mkdir(parents=True, exist_ok=True) + RUNTIME_MODEL_ROOT.mkdir(parents=True, exist_ok=True) + + templates_initialized = 0 + skills_synchronized = 0 + + if _copy_if_missing(BUNDLED_AGENT_MD_TEMPLATES_FILE, AGENT_MD_TEMPLATES_FILE): + templates_initialized += 1 + if _copy_if_missing(BUNDLED_TOPIC_PRESETS_TEMPLATES_FILE, TOPIC_PRESETS_TEMPLATES_FILE): + templates_initialized += 1 + + for src in _iter_bundled_skill_packages(): + if _copy_if_different(src, RUNTIME_SKILLS_ROOT / src.name): + skills_synchronized += 1 + + return { + "templates_initialized": templates_initialized, + "skills_synchronized": skills_synchronized, + } + + +def _extract_skill_zip_summary(zip_path: Path) -> Dict[str, Any]: + entry_names: List[str] = [] + description = "" + with zipfile.ZipFile(zip_path) as archive: + members = archive.infolist() + file_members = [member for member in members if not member.is_dir()] + for member in file_members: + raw_name = str(member.filename or "").replace("\\", "/").lstrip("/") + if not raw_name: + continue + first = raw_name.split("/", 1)[0].strip() + if _is_ignored_skill_zip_top_level(first): + continue + if _is_valid_top_level_skill_name(first) and first not in entry_names: + entry_names.append(first) + + candidates = sorted( + [ + str(member.filename or "").replace("\\", "/").lstrip("/") + for member in file_members + if str(member.filename or "").replace("\\", "/").rsplit("/", 1)[-1].lower() + in {"skill.md", "readme.md"} + ], + key=lambda value: (value.count("/"), value.lower()), + ) + for candidate in candidates: + try: + with archive.open(candidate, "r") as file: + preview = file.read(4096).decode("utf-8", errors="ignore") + description = _read_description_from_text(preview) + if description: + break + except Exception: + continue + return { + "entry_names": entry_names, + "description": description, + } + + +def _default_display_name(stem: str) -> str: + chunks = [chunk for chunk in re.split(r"[-_]+", str(stem or "").strip()) if chunk] + if not chunks: + return "Skill" + return " ".join(chunk.upper() if chunk.isupper() else chunk.capitalize() for chunk in chunks) + + +def _resolve_unique_skill_key(existing_keys: set[str], preferred_key: str) -> str: + base_key = _sanitize_skill_market_key(preferred_key) or "skill" + candidate = base_key + counter = 2 + while candidate in existing_keys: + candidate = f"{base_key}-{counter}" + counter += 1 + existing_keys.add(candidate) + return candidate + + +def ensure_default_skill_market_items(session: Session) -> Dict[str, List[str]]: + report: Dict[str, List[str]] = {"created": [], "updated": []} + default_packages = _iter_bundled_skill_packages() + if not default_packages: + return report + + rows = session.exec(select(SkillMarketItem)).all() + existing_by_zip = {str(row.zip_filename or "").strip(): row for row in rows if str(row.zip_filename or "").strip()} + existing_keys = {str(row.skill_key or "").strip() for row in rows if str(row.skill_key or "").strip()} + + for bundled_path in default_packages: + runtime_path = RUNTIME_SKILLS_ROOT / bundled_path.name + source_path = runtime_path if runtime_path.exists() else bundled_path + try: + summary = _extract_skill_zip_summary(source_path) + except Exception: + continue + + zip_filename = bundled_path.name + entry_names_json = json.dumps(summary["entry_names"], ensure_ascii=False) + display_name = _default_display_name((summary["entry_names"] or [bundled_path.stem])[0]) + zip_size_bytes = int(source_path.stat().st_size) if source_path.exists() else 0 + row = existing_by_zip.get(zip_filename) + + if row is None: + row = SkillMarketItem( + skill_key=_resolve_unique_skill_key(existing_keys, bundled_path.stem), + display_name=display_name, + description=str(summary["description"] or "").strip(), + zip_filename=zip_filename, + zip_size_bytes=zip_size_bytes, + entry_names_json=entry_names_json, + ) + session.add(row) + existing_by_zip[zip_filename] = row + report["created"].append(zip_filename) + continue + + changed = False + if int(row.zip_size_bytes or 0) != zip_size_bytes: + row.zip_size_bytes = zip_size_bytes + changed = True + if str(row.entry_names_json or "") != entry_names_json: + row.entry_names_json = entry_names_json + changed = True + if not str(row.display_name or "").strip(): + row.display_name = display_name + changed = True + if not str(row.description or "").strip() and str(summary["description"] or "").strip(): + row.description = str(summary["description"] or "").strip() + changed = True + if changed: + report["updated"].append(zip_filename) + + if report["created"] or report["updated"]: + session.commit() + + return report diff --git a/backend/services/template_service.py b/backend/services/template_service.py index 0ec0070..224b4ab 100644 --- a/backend/services/template_service.py +++ b/backend/services/template_service.py @@ -2,21 +2,30 @@ from __future__ import annotations from typing import Any, Dict, List -from core.settings import AGENT_MD_TEMPLATES_FILE, TOPIC_PRESETS_TEMPLATES_FILE +from core.settings import ( + AGENT_MD_TEMPLATES_FILE, + BUNDLED_AGENT_MD_TEMPLATES_FILE, + BUNDLED_TOPIC_PRESETS_TEMPLATES_FILE, + TOPIC_PRESETS_TEMPLATES_FILE, +) TEMPLATE_KEYS = ("agents_md", "soul_md", "user_md", "tools_md", "identity_md") -def _load_json_object(path: str) -> Dict[str, Any]: +def _load_json_object(path: str, fallback_path: str = "") -> Dict[str, Any]: import json - try: - with open(path, "r", encoding="utf-8") as file: - data = json.load(file) - if isinstance(data, dict): - return data - except Exception: - pass + for candidate in [path, fallback_path]: + candidate = str(candidate or "").strip() + if not candidate: + continue + try: + with open(candidate, "r", encoding="utf-8") as file: + data = json.load(file) + if isinstance(data, dict): + return data + except Exception: + continue return {} @@ -36,12 +45,12 @@ def _write_json_atomic(path: str, payload: Dict[str, Any]) -> None: def get_agent_md_templates() -> Dict[str, str]: - raw = _load_json_object(str(AGENT_MD_TEMPLATES_FILE)) + raw = _load_json_object(str(AGENT_MD_TEMPLATES_FILE), str(BUNDLED_AGENT_MD_TEMPLATES_FILE)) return {key: _normalize_md_text(raw.get(key)) for key in TEMPLATE_KEYS} def get_topic_presets() -> Dict[str, Any]: - raw = _load_json_object(str(TOPIC_PRESETS_TEMPLATES_FILE)) + raw = _load_json_object(str(TOPIC_PRESETS_TEMPLATES_FILE), str(BUNDLED_TOPIC_PRESETS_TEMPLATES_FILE)) presets = raw.get("presets") if not isinstance(presets, list): return {"presets": []} @@ -68,4 +77,3 @@ def update_topic_presets(raw: Dict[str, Any]) -> Dict[str, Any]: def get_agent_template_value(key: str) -> str: return get_agent_md_templates().get(key, "") - diff --git a/data/model/README.md b/data/model/README.md new file mode 100644 index 0000000..7ed4700 --- /dev/null +++ b/data/model/README.md @@ -0,0 +1,9 @@ +# Speech Model Directory + +This directory is reserved for local Whisper `.bin` model files and is intentionally not committed to source control. + +Put the file configured by `STT_MODEL` here, for example: + +- `ggml-small-q8_0.bin` + +If the model file is missing, the backend will still start, but it will print a startup warning and speech transcription requests will not work until the file is added. diff --git a/data/skills/nano-banana-pro.zip b/data/skills/nano-banana-pro.zip new file mode 100644 index 0000000..f81ade6 Binary files /dev/null and b/data/skills/nano-banana-pro.zip differ diff --git a/data/skills/powerpoint-pptx.zip b/data/skills/powerpoint-pptx.zip new file mode 100644 index 0000000..5fa24b7 Binary files /dev/null and b/data/skills/powerpoint-pptx.zip differ diff --git a/data/skills/self-improving-agent.zip b/data/skills/self-improving-agent.zip new file mode 100644 index 0000000..777c24e Binary files /dev/null and b/data/skills/self-improving-agent.zip differ diff --git a/data/skills/stock-analysis.zip b/data/skills/stock-analysis.zip new file mode 100644 index 0000000..0523962 Binary files /dev/null and b/data/skills/stock-analysis.zip differ diff --git a/data/skills/tavily-search.zip b/data/skills/tavily-search.zip new file mode 100644 index 0000000..a64d2a3 Binary files /dev/null and b/data/skills/tavily-search.zip differ diff --git a/data/skills/writing.zip b/data/skills/writing.zip new file mode 100644 index 0000000..84b2155 Binary files /dev/null and b/data/skills/writing.zip differ diff --git a/data/templates/agent_md_templates.json b/data/templates/agent_md_templates.json new file mode 100644 index 0000000..0160d18 --- /dev/null +++ b/data/templates/agent_md_templates.json @@ -0,0 +1,7 @@ +{ + "agents_md": "- 优先完成任务目标\n- 操作前先说明意图\n- 输出必须可执行\n\n## 默认输出规范\n\n- 每次执行任务时,在 workspace 中创建新目录保存本次输出。\n- 输出内容默认采用 Markdown(.md)格式。\n- 最终报告需求.md和.htm双格式。", + "soul_md": "你是专业的企业数字员工,表达清晰、可执行。", + "user_md": "- 语言: 中文\n- 风格: 专业\n- 偏好: 简明且有步骤", + "tools_md": "- 谨慎使用 shell\n- 修改文件后复核\n- 失败时说明原因并重试策略", + "identity_md": "- 角色: 企业数字员工\n- 领域: 运维与任务执行" +} diff --git a/data/templates/topic_presets.json b/data/templates/topic_presets.json new file mode 100644 index 0000000..78c235c --- /dev/null +++ b/data/templates/topic_presets.json @@ -0,0 +1,134 @@ +{ + "presets": [ + { + "id": "politics", + "topic_key": "politics_news", + "name": "时政新闻", + "description": "沉淀国内外时政动态、政策发布与重大公共治理事件,便于集中查看。", + "routing_purpose": "收录与政府决策、政策法规、外交事务及公共治理相关的关键信息。", + "routing_include_when": [ + "时政", + "政策", + "法规", + "国务院", + "政府", + "部委", + "人大", + "政协", + "外交", + "国际关系", + "白宫", + "总统", + "议会", + "election", + "policy" + ], + "routing_exclude_when": [ + "娱乐", + "明星", + "综艺", + "体育", + "游戏", + "购物", + "种草", + "广告" + ], + "routing_examples_positive": [ + "国务院发布新一轮宏观政策措施。", + "外交部就国际热点事件发布声明。", + "某国总统宣布新的对外政策方向。" + ], + "routing_examples_negative": [ + "某明星新剧开播引发热议。", + "某球队转会新闻与赛果分析。", + "数码产品促销与购物推荐汇总。" + ], + "routing_priority": 85 + }, + { + "id": "finance", + "topic_key": "finance_market", + "name": "财经信息", + "description": "聚合宏观经济、市场波动、公司财报与监管政策等财经信息。", + "routing_purpose": "沉淀与资本市场、行业景气、资产价格相关的关键结论与风险提示。", + "routing_include_when": [ + "财经", + "金融", + "股市", + "A股", + "港股", + "美股", + "债券", + "汇率", + "利率", + "通胀", + "GDP", + "财报", + "央行", + "market", + "earnings" + ], + "routing_exclude_when": [ + "娱乐", + "体育", + "游戏", + "影视", + "八卦", + "生活方式", + "旅行攻略" + ], + "routing_examples_positive": [ + "央行公布最新利率决议并释放政策信号。", + "上市公司发布季度财报并上调全年指引。", + "美元指数走强导致主要货币普遍承压。" + ], + "routing_examples_negative": [ + "某综艺节目收视排名变化。", + "某球员转会传闻引发讨论。", + "新游上线玩法测评。" + ], + "routing_priority": 80 + }, + { + "id": "tech", + "topic_key": "tech_updates", + "name": "技术资讯", + "description": "追踪 AI、云计算、开源社区与开发工具链的最新技术资讯。", + "routing_purpose": "沉淀技术发布、版本升级、兼容性变更与工程实践建议。", + "routing_include_when": [ + "技术", + "开源", + "AI", + "模型", + "大语言模型", + "MCP", + "API", + "SDK", + "发布", + "版本", + "升级", + "breaking change", + "security advisory" + ], + "routing_exclude_when": [ + "娱乐", + "体育", + "美食", + "旅游", + "情感", + "八卦" + ], + "routing_examples_positive": [ + "某主流框架发布新版本并调整默认配置。", + "开源项目披露高危安全漏洞并给出修复方案。", + "AI 模型服务更新 API,返回结构发生变化。" + ], + "routing_examples_negative": [ + "某艺人参加活动造型盘点。", + "旅游目的地打卡攻略合集。", + "比赛结果预测与竞猜。" + ], + "routing_priority": 75 + } + ] +} diff --git a/docker-compose.full.yml b/docker-compose.full.yml index ed81450..6617ccf 100644 --- a/docker-compose.full.yml +++ b/docker-compose.full.yml @@ -9,7 +9,7 @@ services: POSTGRES_PASSWORD: ${POSTGRES_SUPERPASSWORD:?POSTGRES_SUPERPASSWORD is required} POSTGRES_DB: ${POSTGRES_BOOTSTRAP_DB:-postgres} volumes: - - ${HOST_DATA_ROOT}/postgres:/var/lib/postgresql/data + - ./data/postgres:/var/lib/postgresql/data expose: - "5432" healthcheck: @@ -32,7 +32,7 @@ services: TZ: ${TZ:-Asia/Shanghai} command: ["redis-server", "--appendonly", "yes", "--save", "60", "1000"] volumes: - - ${HOST_DATA_ROOT}/redis:/data + - ./data/redis:/data expose: - "6379" healthcheck: @@ -74,7 +74,7 @@ services: DATABASE_POOL_TIMEOUT: ${DATABASE_POOL_TIMEOUT:-30} DATABASE_POOL_RECYCLE: ${DATABASE_POOL_RECYCLE:-1800} UPLOAD_MAX_MB: ${UPLOAD_MAX_MB:-100} - DATA_ROOT: ${HOST_DATA_ROOT} + DATA_ROOT: /app/data BOTS_WORKSPACE_ROOT: ${HOST_BOTS_WORKSPACE_ROOT} DATABASE_URL: postgresql+psycopg://${POSTGRES_APP_USER}:${POSTGRES_APP_PASSWORD}@postgres:5432/${POSTGRES_APP_DB} REDIS_ENABLED: ${REDIS_ENABLED:-true} @@ -89,7 +89,7 @@ services: WORKSPACE_DOWNLOAD_EXTENSIONS: ${WORKSPACE_DOWNLOAD_EXTENSIONS:-} STT_ENABLED: ${STT_ENABLED:-true} STT_MODEL: ${STT_MODEL:-ggml-small-q8_0.bin} - STT_MODEL_DIR: ${STT_MODEL_DIR:-${HOST_DATA_ROOT}/model} + STT_MODEL_DIR: ${STT_MODEL_DIR:-/app/data/model} STT_DEVICE: ${STT_DEVICE:-cpu} STT_MAX_AUDIO_SECONDS: ${STT_MAX_AUDIO_SECONDS:-20} STT_DEFAULT_LANGUAGE: ${STT_DEFAULT_LANGUAGE:-zh} @@ -99,7 +99,7 @@ services: STT_INITIAL_PROMPT: ${STT_INITIAL_PROMPT:-以下内容可能包含简体中文和英文术语。请优先输出简体中文,英文单词、缩写、品牌名和数字保持原文,不要翻译。} volumes: - /var/run/docker.sock:/var/run/docker.sock - - ${HOST_DATA_ROOT}:${HOST_DATA_ROOT} + - ./data:/app/data - ${HOST_BOTS_WORKSPACE_ROOT}:${HOST_BOTS_WORKSPACE_ROOT} expose: - "8000" diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index fdcd863..57fd88f 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -20,7 +20,7 @@ services: DATABASE_POOL_TIMEOUT: ${DATABASE_POOL_TIMEOUT:-30} DATABASE_POOL_RECYCLE: ${DATABASE_POOL_RECYCLE:-1800} UPLOAD_MAX_MB: ${UPLOAD_MAX_MB:-100} - DATA_ROOT: ${HOST_DATA_ROOT} + DATA_ROOT: /app/data BOTS_WORKSPACE_ROOT: ${HOST_BOTS_WORKSPACE_ROOT} DATABASE_URL: ${DATABASE_URL:-} REDIS_ENABLED: ${REDIS_ENABLED:-false} @@ -33,7 +33,7 @@ services: PANEL_ACCESS_PASSWORD: ${PANEL_ACCESS_PASSWORD:-} STT_ENABLED: ${STT_ENABLED:-true} STT_MODEL: ${STT_MODEL:-ggml-small-q8_0.bin} - STT_MODEL_DIR: ${STT_MODEL_DIR:-${HOST_DATA_ROOT}/model} + STT_MODEL_DIR: ${STT_MODEL_DIR:-/app/data/model} STT_DEVICE: ${STT_DEVICE:-cpu} STT_MAX_AUDIO_SECONDS: ${STT_MAX_AUDIO_SECONDS:-20} STT_DEFAULT_LANGUAGE: ${STT_DEFAULT_LANGUAGE:-zh} @@ -43,7 +43,7 @@ services: STT_INITIAL_PROMPT: ${STT_INITIAL_PROMPT:-以下内容可能包含简体中文和英文术语。请优先输出简体中文,英文单词、缩写、品牌名和数字保持原文,不要翻译。} volumes: - /var/run/docker.sock:/var/run/docker.sock - - ${HOST_DATA_ROOT}:${HOST_DATA_ROOT} + - ./data:/app/data - ${HOST_BOTS_WORKSPACE_ROOT}:${HOST_BOTS_WORKSPACE_ROOT} expose: - "8000" diff --git a/scripts/deploy-full.sh b/scripts/deploy-full.sh index 48025cd..a6b38d1 100755 --- a/scripts/deploy-full.sh +++ b/scripts/deploy-full.sh @@ -4,6 +4,7 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" ENV_FILE="${1:-$ROOT_DIR/.env.full}" COMPOSE_FILE="$ROOT_DIR/docker-compose.full.yml" +DATA_DIR="$ROOT_DIR/data" require_file() { local path="$1" @@ -89,7 +90,6 @@ wait_for_health() { require_file "$ENV_FILE" "Create it from: $ROOT_DIR/.env.full.example" require_file "$COMPOSE_FILE" "" -load_env_var HOST_DATA_ROOT load_env_var HOST_BOTS_WORKSPACE_ROOT load_env_var POSTGRES_SUPERUSER postgres load_env_var POSTGRES_SUPERPASSWORD @@ -99,7 +99,6 @@ load_env_var POSTGRES_APP_USER load_env_var POSTGRES_APP_PASSWORD load_env_var NGINX_PORT 8080 -require_env HOST_DATA_ROOT require_env HOST_BOTS_WORKSPACE_ROOT require_env POSTGRES_SUPERUSER require_env POSTGRES_SUPERPASSWORD @@ -111,10 +110,10 @@ require_env NGINX_PORT echo "[deploy-full] using env: $ENV_FILE" mkdir -p \ - "$HOST_DATA_ROOT" \ - "$HOST_DATA_ROOT/postgres" \ - "$HOST_DATA_ROOT/redis" \ - "$HOST_DATA_ROOT/model" \ + "$DATA_DIR" \ + "$DATA_DIR/postgres" \ + "$DATA_DIR/redis" \ + "$DATA_DIR/model" \ "$HOST_BOTS_WORKSPACE_ROOT" docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" config -q diff --git a/scripts/deploy-prod.sh b/scripts/deploy-prod.sh index 8c992aa..6cd98aa 100755 --- a/scripts/deploy-prod.sh +++ b/scripts/deploy-prod.sh @@ -3,6 +3,7 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" ENV_FILE="${1:-$ROOT_DIR/.env.prod}" +DATA_DIR="$ROOT_DIR/data" if [[ ! -f "$ENV_FILE" ]]; then echo "Missing env file: $ENV_FILE" @@ -11,6 +12,7 @@ if [[ ! -f "$ENV_FILE" ]]; then fi echo "[deploy] using env: $ENV_FILE" +mkdir -p "$DATA_DIR" "$DATA_DIR/model" docker compose --env-file "$ENV_FILE" -f "$ROOT_DIR/docker-compose.prod.yml" config -q docker compose --env-file "$ENV_FILE" -f "$ROOT_DIR/docker-compose.prod.yml" up -d --build