v0.1.5
parent
ad2af1e71f
commit
aeaaa4fde5
|
|
@ -11,6 +11,9 @@ import docker
|
||||||
|
|
||||||
|
|
||||||
class BotDockerManager:
|
class BotDockerManager:
|
||||||
|
_RUNTIME_BOOTSTRAP_LABEL_KEY = "dashboard.runtime_bootstrap"
|
||||||
|
_RUNTIME_BOOTSTRAP_LABEL_VALUE = "env-json-v1"
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
host_data_root: str,
|
host_data_root: str,
|
||||||
|
|
@ -180,6 +183,136 @@ class BotDockerManager:
|
||||||
|
|
||||||
return str(network_settings.get("IPAddress") or "").strip()
|
return str(network_settings.get("IPAddress") or "").strip()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _container_uses_expected_bootstrap(cls, container: Any) -> bool:
|
||||||
|
attrs = getattr(container, "attrs", {}) or {}
|
||||||
|
config = attrs.get("Config") or {}
|
||||||
|
labels = config.get("Labels") or {}
|
||||||
|
return str(labels.get(cls._RUNTIME_BOOTSTRAP_LABEL_KEY) or "").strip() == cls._RUNTIME_BOOTSTRAP_LABEL_VALUE
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _runtime_bootstrap_entrypoint() -> List[str]:
|
||||||
|
bootstrap_code = "\n".join(
|
||||||
|
[
|
||||||
|
"import json",
|
||||||
|
"import os",
|
||||||
|
"import pathlib",
|
||||||
|
"import re",
|
||||||
|
"",
|
||||||
|
"path = pathlib.Path('/root/.nanobot/env.json')",
|
||||||
|
"pattern = re.compile(r'^[A-Z_][A-Z0-9_]{0,127}$')",
|
||||||
|
"data = {}",
|
||||||
|
"if path.is_file():",
|
||||||
|
" try:",
|
||||||
|
" data = json.loads(path.read_text(encoding='utf-8'))",
|
||||||
|
" except Exception:",
|
||||||
|
" data = {}",
|
||||||
|
"if not isinstance(data, dict):",
|
||||||
|
" data = {}",
|
||||||
|
"for raw_key, raw_value in data.items():",
|
||||||
|
" key = str(raw_key or '').strip().upper()",
|
||||||
|
" if not pattern.fullmatch(key):",
|
||||||
|
" continue",
|
||||||
|
" os.environ[key] = str(raw_value or '').strip()",
|
||||||
|
"os.execvp('nanobot', ['nanobot', 'gateway'])",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return [
|
||||||
|
"python",
|
||||||
|
"-c",
|
||||||
|
bootstrap_code,
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _container_has_mount(container: Any, source: str, destination: str) -> bool:
|
||||||
|
attrs = getattr(container, "attrs", {}) or {}
|
||||||
|
mounts = attrs.get("Mounts") or []
|
||||||
|
expected_source = os.path.normpath(source)
|
||||||
|
expected_destination = str(destination or "").strip()
|
||||||
|
for mount in mounts:
|
||||||
|
if not isinstance(mount, dict):
|
||||||
|
continue
|
||||||
|
current_source = os.path.normpath(str(mount.get("Source") or ""))
|
||||||
|
current_destination = str(mount.get("Destination") or "").strip()
|
||||||
|
if current_source != expected_source or current_destination != expected_destination:
|
||||||
|
continue
|
||||||
|
if mount.get("RW") is False:
|
||||||
|
continue
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _desired_memory_bytes(memory_mb: int) -> int:
|
||||||
|
return int(memory_mb) * 1024 * 1024 if int(memory_mb or 0) > 0 else 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _desired_storage_bytes(storage_gb: int) -> Optional[int]:
|
||||||
|
storage = int(storage_gb or 0)
|
||||||
|
if storage <= 0:
|
||||||
|
return None
|
||||||
|
return storage * 1024 * 1024 * 1024
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_container_cpu_cores(container: Any) -> float:
|
||||||
|
attrs = getattr(container, "attrs", {}) or {}
|
||||||
|
host_cfg = attrs.get("HostConfig") or {}
|
||||||
|
nano_cpus = int(host_cfg.get("NanoCpus") or 0)
|
||||||
|
if nano_cpus > 0:
|
||||||
|
return nano_cpus / 1_000_000_000
|
||||||
|
cpu_quota = int(host_cfg.get("CpuQuota") or 0)
|
||||||
|
cpu_period = int(host_cfg.get("CpuPeriod") or 0)
|
||||||
|
if cpu_quota > 0 and cpu_period > 0:
|
||||||
|
return cpu_quota / cpu_period
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def _container_storage_matches(self, actual_storage_bytes: Optional[int], desired_storage_gb: int) -> bool:
|
||||||
|
expected_storage_bytes = self._desired_storage_bytes(desired_storage_gb)
|
||||||
|
if expected_storage_bytes is None:
|
||||||
|
return actual_storage_bytes in {None, 0}
|
||||||
|
if actual_storage_bytes == expected_storage_bytes:
|
||||||
|
return True
|
||||||
|
return actual_storage_bytes is None and self._storage_limit_supported is not True
|
||||||
|
|
||||||
|
def _container_matches_runtime(
|
||||||
|
self,
|
||||||
|
container: Any,
|
||||||
|
*,
|
||||||
|
image: str,
|
||||||
|
cpu_cores: float,
|
||||||
|
memory_mb: int,
|
||||||
|
storage_gb: int,
|
||||||
|
bot_workspace: str,
|
||||||
|
network_name: str,
|
||||||
|
) -> bool:
|
||||||
|
attrs = getattr(container, "attrs", {}) or {}
|
||||||
|
config = attrs.get("Config") or {}
|
||||||
|
host_cfg = attrs.get("HostConfig") or {}
|
||||||
|
current_image = str(config.get("Image") or "").strip()
|
||||||
|
if current_image != image:
|
||||||
|
return False
|
||||||
|
if not self._container_uses_expected_bootstrap(container):
|
||||||
|
return False
|
||||||
|
if not self._container_uses_network(container, network_name):
|
||||||
|
return False
|
||||||
|
if not self._container_has_mount(container, bot_workspace, "/root/.nanobot"):
|
||||||
|
return False
|
||||||
|
|
||||||
|
actual_memory_bytes = int(host_cfg.get("Memory") or 0)
|
||||||
|
if actual_memory_bytes != self._desired_memory_bytes(memory_mb):
|
||||||
|
return False
|
||||||
|
|
||||||
|
desired_cpu = float(cpu_cores or 0)
|
||||||
|
actual_cpu = self._get_container_cpu_cores(container)
|
||||||
|
if abs(actual_cpu - desired_cpu) > 0.01:
|
||||||
|
return False
|
||||||
|
|
||||||
|
storage_opt = host_cfg.get("StorageOpt") or {}
|
||||||
|
actual_storage_bytes = self._parse_size_to_bytes(storage_opt.get("size"))
|
||||||
|
if not self._container_storage_matches(actual_storage_bytes, storage_gb):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def _run_container_with_storage_fallback(
|
def _run_container_with_storage_fallback(
|
||||||
self,
|
self,
|
||||||
bot_id: str,
|
bot_id: str,
|
||||||
|
|
@ -246,7 +379,10 @@ class BotDockerManager:
|
||||||
"detach": True,
|
"detach": True,
|
||||||
"stdin_open": True,
|
"stdin_open": True,
|
||||||
"tty": True,
|
"tty": True,
|
||||||
"environment": env_vars or {},
|
"entrypoint": self._runtime_bootstrap_entrypoint(),
|
||||||
|
"labels": {
|
||||||
|
self._RUNTIME_BOOTSTRAP_LABEL_KEY: self._RUNTIME_BOOTSTRAP_LABEL_VALUE,
|
||||||
|
},
|
||||||
"volumes": {
|
"volumes": {
|
||||||
bot_workspace: {"bind": "/root/.nanobot", "mode": "rw"},
|
bot_workspace: {"bind": "/root/.nanobot", "mode": "rw"},
|
||||||
},
|
},
|
||||||
|
|
@ -261,16 +397,32 @@ class BotDockerManager:
|
||||||
try:
|
try:
|
||||||
container = self.client.containers.get(container_name)
|
container = self.client.containers.get(container_name)
|
||||||
container.reload()
|
container.reload()
|
||||||
if container.status == "running" and self._container_uses_network(container, target_network):
|
if container.status in {"running", "restarting"} and self._container_uses_network(container, target_network):
|
||||||
if on_state_change:
|
if on_state_change:
|
||||||
self.ensure_monitor(bot_id, on_state_change)
|
self.ensure_monitor(bot_id, on_state_change)
|
||||||
return True
|
return True
|
||||||
if container.status == "running":
|
if container.status in {"running", "restarting"}:
|
||||||
print(
|
print(
|
||||||
f"[DockerManager] recreating {container_name} to switch network "
|
f"[DockerManager] recreating {container_name} to switch network "
|
||||||
f"from current attachment to '{target_network}'"
|
f"from current attachment to '{target_network}'"
|
||||||
)
|
)
|
||||||
container.remove(force=True)
|
container.remove(force=True)
|
||||||
|
elif self._container_matches_runtime(
|
||||||
|
container,
|
||||||
|
image=image,
|
||||||
|
cpu_cores=cpu,
|
||||||
|
memory_mb=memory,
|
||||||
|
storage_gb=storage,
|
||||||
|
bot_workspace=bot_workspace,
|
||||||
|
network_name=target_network,
|
||||||
|
):
|
||||||
|
container.start()
|
||||||
|
if on_state_change:
|
||||||
|
self.ensure_monitor(bot_id, on_state_change)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"[DockerManager] recreating {container_name} because container config no longer matches desired runtime")
|
||||||
|
container.remove(force=True)
|
||||||
except docker.errors.NotFound:
|
except docker.errors.NotFound:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -322,13 +474,16 @@ class BotDockerManager:
|
||||||
print(f"[DockerManager] Error ensuring monitor for {bot_id}: {e}")
|
print(f"[DockerManager] Error ensuring monitor for {bot_id}: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def stop_bot(self, bot_id: str) -> bool:
|
def stop_bot(self, bot_id: str, remove: bool = False) -> bool:
|
||||||
if not self.client:
|
if not self.client:
|
||||||
return False
|
return False
|
||||||
container_name = f"worker_{bot_id}"
|
container_name = f"worker_{bot_id}"
|
||||||
try:
|
try:
|
||||||
container = self.client.containers.get(container_name)
|
container = self.client.containers.get(container_name)
|
||||||
|
container.reload()
|
||||||
|
if str(container.status or "").strip().lower() in {"running", "restarting", "paused"}:
|
||||||
container.stop(timeout=5)
|
container.stop(timeout=5)
|
||||||
|
if remove:
|
||||||
container.remove()
|
container.remove()
|
||||||
self.active_monitors.pop(bot_id, None)
|
self.active_monitors.pop(bot_id, None)
|
||||||
return True
|
return True
|
||||||
|
|
|
||||||
|
|
@ -125,7 +125,7 @@ def deactivate_bot_instance(session: Session, bot_id: str) -> Dict[str, Any]:
|
||||||
|
|
||||||
def delete_bot_instance(session: Session, bot_id: str, delete_workspace: bool = True) -> Dict[str, Any]:
|
def delete_bot_instance(session: Session, bot_id: str, delete_workspace: bool = True) -> Dict[str, Any]:
|
||||||
bot = _get_bot_or_404(session, bot_id)
|
bot = _get_bot_or_404(session, bot_id)
|
||||||
docker_manager.stop_bot(bot_id)
|
docker_manager.stop_bot(bot_id, remove=True)
|
||||||
|
|
||||||
messages = session.exec(select(BotMessage).where(BotMessage.bot_id == bot_id)).all()
|
messages = session.exec(select(BotMessage).where(BotMessage.bot_id == bot_id)).all()
|
||||||
for row in messages:
|
for row in messages:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,206 @@
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import types
|
||||||
|
import unittest
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
docker_stub = types.ModuleType("docker")
|
||||||
|
docker_stub.errors = types.SimpleNamespace(
|
||||||
|
ImageNotFound=type("ImageNotFound", (Exception,), {}),
|
||||||
|
NotFound=type("NotFound", (Exception,), {}),
|
||||||
|
)
|
||||||
|
sys.modules.setdefault("docker", docker_stub)
|
||||||
|
|
||||||
|
from core.docker_manager import BotDockerManager
|
||||||
|
|
||||||
|
|
||||||
|
class BotDockerManagerTests(unittest.TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self._tmpdir = tempfile.TemporaryDirectory()
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
self._tmpdir.cleanup()
|
||||||
|
|
||||||
|
def _make_manager(self) -> BotDockerManager:
|
||||||
|
manager = BotDockerManager.__new__(BotDockerManager)
|
||||||
|
manager.client = MagicMock()
|
||||||
|
manager.host_data_root = self._tmpdir.name
|
||||||
|
manager.base_image = "nanobot-base"
|
||||||
|
manager.network_name = ""
|
||||||
|
manager.active_monitors = {}
|
||||||
|
manager._last_delivery_error = {}
|
||||||
|
manager._storage_limit_supported = True
|
||||||
|
manager._storage_limit_warning_emitted = False
|
||||||
|
return manager
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_container(
|
||||||
|
*,
|
||||||
|
status: str,
|
||||||
|
image: str,
|
||||||
|
nano_cpus: int,
|
||||||
|
memory_bytes: int,
|
||||||
|
storage_opt_size: str,
|
||||||
|
source_mount: str,
|
||||||
|
network_name: str,
|
||||||
|
bootstrap_label: str | None = "env-json-v1",
|
||||||
|
) -> MagicMock:
|
||||||
|
container = MagicMock()
|
||||||
|
container.status = status
|
||||||
|
container.reload = MagicMock()
|
||||||
|
container.start = MagicMock()
|
||||||
|
container.stop = MagicMock()
|
||||||
|
container.remove = MagicMock()
|
||||||
|
container.attrs = {
|
||||||
|
"Config": {
|
||||||
|
"Image": image,
|
||||||
|
"Labels": (
|
||||||
|
{"dashboard.runtime_bootstrap": bootstrap_label}
|
||||||
|
if bootstrap_label is not None
|
||||||
|
else {}
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"HostConfig": {
|
||||||
|
"NanoCpus": nano_cpus,
|
||||||
|
"Memory": memory_bytes,
|
||||||
|
"StorageOpt": {"size": storage_opt_size},
|
||||||
|
},
|
||||||
|
"Mounts": [
|
||||||
|
{
|
||||||
|
"Source": source_mount,
|
||||||
|
"Destination": "/root/.nanobot",
|
||||||
|
"RW": True,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"NetworkSettings": {
|
||||||
|
"Networks": {network_name: {"IPAddress": "172.18.0.2"}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return container
|
||||||
|
|
||||||
|
def test_stop_bot_keeps_container_by_default(self) -> None:
|
||||||
|
manager = self._make_manager()
|
||||||
|
container = MagicMock()
|
||||||
|
container.status = "running"
|
||||||
|
container.reload = MagicMock()
|
||||||
|
container.stop = MagicMock()
|
||||||
|
container.remove = MagicMock()
|
||||||
|
manager.client.containers.get.return_value = container
|
||||||
|
|
||||||
|
result = manager.stop_bot("demo")
|
||||||
|
|
||||||
|
self.assertTrue(result)
|
||||||
|
container.stop.assert_called_once_with(timeout=5)
|
||||||
|
container.remove.assert_not_called()
|
||||||
|
|
||||||
|
def test_stop_bot_remove_true_deletes_container(self) -> None:
|
||||||
|
manager = self._make_manager()
|
||||||
|
container = MagicMock()
|
||||||
|
container.status = "exited"
|
||||||
|
container.reload = MagicMock()
|
||||||
|
container.stop = MagicMock()
|
||||||
|
container.remove = MagicMock()
|
||||||
|
manager.client.containers.get.return_value = container
|
||||||
|
|
||||||
|
result = manager.stop_bot("demo", remove=True)
|
||||||
|
|
||||||
|
self.assertTrue(result)
|
||||||
|
container.stop.assert_not_called()
|
||||||
|
container.remove.assert_called_once_with()
|
||||||
|
|
||||||
|
def test_start_bot_reuses_compatible_stopped_container(self) -> None:
|
||||||
|
manager = self._make_manager()
|
||||||
|
image_tag = "nanobot-base:v1"
|
||||||
|
workspace_mount = f"{self._tmpdir.name}/demo/.nanobot"
|
||||||
|
container = self._build_container(
|
||||||
|
status="exited",
|
||||||
|
image=image_tag,
|
||||||
|
nano_cpus=1_000_000_000,
|
||||||
|
memory_bytes=1024 * 1024 * 1024,
|
||||||
|
storage_opt_size="10G",
|
||||||
|
source_mount=workspace_mount,
|
||||||
|
network_name="bridge",
|
||||||
|
)
|
||||||
|
manager.client.images.get.return_value = MagicMock()
|
||||||
|
manager.client.containers.get.return_value = container
|
||||||
|
|
||||||
|
result = manager.start_bot(
|
||||||
|
"demo",
|
||||||
|
image_tag=image_tag,
|
||||||
|
env_vars={"TZ": "UTC", "API_KEY": "updated-secret"},
|
||||||
|
cpu_cores=1.0,
|
||||||
|
memory_mb=1024,
|
||||||
|
storage_gb=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(result)
|
||||||
|
container.start.assert_called_once_with()
|
||||||
|
container.remove.assert_not_called()
|
||||||
|
manager.client.containers.run.assert_not_called()
|
||||||
|
|
||||||
|
def test_start_bot_recreates_incompatible_stopped_container(self) -> None:
|
||||||
|
manager = self._make_manager()
|
||||||
|
image_tag = "nanobot-base:v1"
|
||||||
|
workspace_mount = f"{self._tmpdir.name}/demo/.nanobot"
|
||||||
|
container = self._build_container(
|
||||||
|
status="exited",
|
||||||
|
image="nanobot-base:old",
|
||||||
|
nano_cpus=1_000_000_000,
|
||||||
|
memory_bytes=1024 * 1024 * 1024,
|
||||||
|
storage_opt_size="10G",
|
||||||
|
source_mount=workspace_mount,
|
||||||
|
network_name="bridge",
|
||||||
|
)
|
||||||
|
manager.client.images.get.return_value = MagicMock()
|
||||||
|
manager.client.containers.get.return_value = container
|
||||||
|
manager._run_container_with_storage_fallback = MagicMock(return_value=MagicMock())
|
||||||
|
|
||||||
|
result = manager.start_bot(
|
||||||
|
"demo",
|
||||||
|
image_tag=image_tag,
|
||||||
|
env_vars={"TZ": "Asia/Shanghai"},
|
||||||
|
cpu_cores=1.0,
|
||||||
|
memory_mb=1024,
|
||||||
|
storage_gb=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(result)
|
||||||
|
container.start.assert_not_called()
|
||||||
|
container.remove.assert_called_once_with(force=True)
|
||||||
|
manager._run_container_with_storage_fallback.assert_called_once()
|
||||||
|
|
||||||
|
def test_start_bot_recreates_container_without_new_entrypoint(self) -> None:
|
||||||
|
manager = self._make_manager()
|
||||||
|
image_tag = "nanobot-base:v1"
|
||||||
|
workspace_mount = f"{self._tmpdir.name}/demo/.nanobot"
|
||||||
|
container = self._build_container(
|
||||||
|
status="exited",
|
||||||
|
image=image_tag,
|
||||||
|
nano_cpus=1_000_000_000,
|
||||||
|
memory_bytes=1024 * 1024 * 1024,
|
||||||
|
storage_opt_size="10G",
|
||||||
|
source_mount=workspace_mount,
|
||||||
|
network_name="bridge",
|
||||||
|
bootstrap_label=None,
|
||||||
|
)
|
||||||
|
manager.client.images.get.return_value = MagicMock()
|
||||||
|
manager.client.containers.get.return_value = container
|
||||||
|
manager._run_container_with_storage_fallback = MagicMock(return_value=MagicMock())
|
||||||
|
|
||||||
|
result = manager.start_bot(
|
||||||
|
"demo",
|
||||||
|
image_tag=image_tag,
|
||||||
|
env_vars={"TZ": "Asia/Shanghai"},
|
||||||
|
cpu_cores=1.0,
|
||||||
|
memory_mb=1024,
|
||||||
|
storage_gb=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(result)
|
||||||
|
container.start.assert_not_called()
|
||||||
|
container.remove.assert_called_once_with(force=True)
|
||||||
|
manager._run_container_with_storage_fallback.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Loading…
Reference in New Issue