v0.1.5
parent
ad2af1e71f
commit
aeaaa4fde5
|
|
@ -11,6 +11,9 @@ import docker
|
|||
|
||||
|
||||
class BotDockerManager:
|
||||
_RUNTIME_BOOTSTRAP_LABEL_KEY = "dashboard.runtime_bootstrap"
|
||||
_RUNTIME_BOOTSTRAP_LABEL_VALUE = "env-json-v1"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
host_data_root: str,
|
||||
|
|
@ -180,6 +183,136 @@ class BotDockerManager:
|
|||
|
||||
return str(network_settings.get("IPAddress") or "").strip()
|
||||
|
||||
@classmethod
|
||||
def _container_uses_expected_bootstrap(cls, container: Any) -> bool:
|
||||
attrs = getattr(container, "attrs", {}) or {}
|
||||
config = attrs.get("Config") or {}
|
||||
labels = config.get("Labels") or {}
|
||||
return str(labels.get(cls._RUNTIME_BOOTSTRAP_LABEL_KEY) or "").strip() == cls._RUNTIME_BOOTSTRAP_LABEL_VALUE
|
||||
|
||||
@staticmethod
|
||||
def _runtime_bootstrap_entrypoint() -> List[str]:
|
||||
bootstrap_code = "\n".join(
|
||||
[
|
||||
"import json",
|
||||
"import os",
|
||||
"import pathlib",
|
||||
"import re",
|
||||
"",
|
||||
"path = pathlib.Path('/root/.nanobot/env.json')",
|
||||
"pattern = re.compile(r'^[A-Z_][A-Z0-9_]{0,127}$')",
|
||||
"data = {}",
|
||||
"if path.is_file():",
|
||||
" try:",
|
||||
" data = json.loads(path.read_text(encoding='utf-8'))",
|
||||
" except Exception:",
|
||||
" data = {}",
|
||||
"if not isinstance(data, dict):",
|
||||
" data = {}",
|
||||
"for raw_key, raw_value in data.items():",
|
||||
" key = str(raw_key or '').strip().upper()",
|
||||
" if not pattern.fullmatch(key):",
|
||||
" continue",
|
||||
" os.environ[key] = str(raw_value or '').strip()",
|
||||
"os.execvp('nanobot', ['nanobot', 'gateway'])",
|
||||
]
|
||||
)
|
||||
return [
|
||||
"python",
|
||||
"-c",
|
||||
bootstrap_code,
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _container_has_mount(container: Any, source: str, destination: str) -> bool:
|
||||
attrs = getattr(container, "attrs", {}) or {}
|
||||
mounts = attrs.get("Mounts") or []
|
||||
expected_source = os.path.normpath(source)
|
||||
expected_destination = str(destination or "").strip()
|
||||
for mount in mounts:
|
||||
if not isinstance(mount, dict):
|
||||
continue
|
||||
current_source = os.path.normpath(str(mount.get("Source") or ""))
|
||||
current_destination = str(mount.get("Destination") or "").strip()
|
||||
if current_source != expected_source or current_destination != expected_destination:
|
||||
continue
|
||||
if mount.get("RW") is False:
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _desired_memory_bytes(memory_mb: int) -> int:
|
||||
return int(memory_mb) * 1024 * 1024 if int(memory_mb or 0) > 0 else 0
|
||||
|
||||
@staticmethod
|
||||
def _desired_storage_bytes(storage_gb: int) -> Optional[int]:
|
||||
storage = int(storage_gb or 0)
|
||||
if storage <= 0:
|
||||
return None
|
||||
return storage * 1024 * 1024 * 1024
|
||||
|
||||
@staticmethod
|
||||
def _get_container_cpu_cores(container: Any) -> float:
|
||||
attrs = getattr(container, "attrs", {}) or {}
|
||||
host_cfg = attrs.get("HostConfig") or {}
|
||||
nano_cpus = int(host_cfg.get("NanoCpus") or 0)
|
||||
if nano_cpus > 0:
|
||||
return nano_cpus / 1_000_000_000
|
||||
cpu_quota = int(host_cfg.get("CpuQuota") or 0)
|
||||
cpu_period = int(host_cfg.get("CpuPeriod") or 0)
|
||||
if cpu_quota > 0 and cpu_period > 0:
|
||||
return cpu_quota / cpu_period
|
||||
return 0.0
|
||||
|
||||
def _container_storage_matches(self, actual_storage_bytes: Optional[int], desired_storage_gb: int) -> bool:
|
||||
expected_storage_bytes = self._desired_storage_bytes(desired_storage_gb)
|
||||
if expected_storage_bytes is None:
|
||||
return actual_storage_bytes in {None, 0}
|
||||
if actual_storage_bytes == expected_storage_bytes:
|
||||
return True
|
||||
return actual_storage_bytes is None and self._storage_limit_supported is not True
|
||||
|
||||
def _container_matches_runtime(
|
||||
self,
|
||||
container: Any,
|
||||
*,
|
||||
image: str,
|
||||
cpu_cores: float,
|
||||
memory_mb: int,
|
||||
storage_gb: int,
|
||||
bot_workspace: str,
|
||||
network_name: str,
|
||||
) -> bool:
|
||||
attrs = getattr(container, "attrs", {}) or {}
|
||||
config = attrs.get("Config") or {}
|
||||
host_cfg = attrs.get("HostConfig") or {}
|
||||
current_image = str(config.get("Image") or "").strip()
|
||||
if current_image != image:
|
||||
return False
|
||||
if not self._container_uses_expected_bootstrap(container):
|
||||
return False
|
||||
if not self._container_uses_network(container, network_name):
|
||||
return False
|
||||
if not self._container_has_mount(container, bot_workspace, "/root/.nanobot"):
|
||||
return False
|
||||
|
||||
actual_memory_bytes = int(host_cfg.get("Memory") or 0)
|
||||
if actual_memory_bytes != self._desired_memory_bytes(memory_mb):
|
||||
return False
|
||||
|
||||
desired_cpu = float(cpu_cores or 0)
|
||||
actual_cpu = self._get_container_cpu_cores(container)
|
||||
if abs(actual_cpu - desired_cpu) > 0.01:
|
||||
return False
|
||||
|
||||
storage_opt = host_cfg.get("StorageOpt") or {}
|
||||
actual_storage_bytes = self._parse_size_to_bytes(storage_opt.get("size"))
|
||||
if not self._container_storage_matches(actual_storage_bytes, storage_gb):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _run_container_with_storage_fallback(
|
||||
self,
|
||||
bot_id: str,
|
||||
|
|
@ -246,7 +379,10 @@ class BotDockerManager:
|
|||
"detach": True,
|
||||
"stdin_open": True,
|
||||
"tty": True,
|
||||
"environment": env_vars or {},
|
||||
"entrypoint": self._runtime_bootstrap_entrypoint(),
|
||||
"labels": {
|
||||
self._RUNTIME_BOOTSTRAP_LABEL_KEY: self._RUNTIME_BOOTSTRAP_LABEL_VALUE,
|
||||
},
|
||||
"volumes": {
|
||||
bot_workspace: {"bind": "/root/.nanobot", "mode": "rw"},
|
||||
},
|
||||
|
|
@ -261,16 +397,32 @@ class BotDockerManager:
|
|||
try:
|
||||
container = self.client.containers.get(container_name)
|
||||
container.reload()
|
||||
if container.status == "running" and self._container_uses_network(container, target_network):
|
||||
if container.status in {"running", "restarting"} and self._container_uses_network(container, target_network):
|
||||
if on_state_change:
|
||||
self.ensure_monitor(bot_id, on_state_change)
|
||||
return True
|
||||
if container.status == "running":
|
||||
if container.status in {"running", "restarting"}:
|
||||
print(
|
||||
f"[DockerManager] recreating {container_name} to switch network "
|
||||
f"from current attachment to '{target_network}'"
|
||||
)
|
||||
container.remove(force=True)
|
||||
elif self._container_matches_runtime(
|
||||
container,
|
||||
image=image,
|
||||
cpu_cores=cpu,
|
||||
memory_mb=memory,
|
||||
storage_gb=storage,
|
||||
bot_workspace=bot_workspace,
|
||||
network_name=target_network,
|
||||
):
|
||||
container.start()
|
||||
if on_state_change:
|
||||
self.ensure_monitor(bot_id, on_state_change)
|
||||
return True
|
||||
else:
|
||||
print(f"[DockerManager] recreating {container_name} because container config no longer matches desired runtime")
|
||||
container.remove(force=True)
|
||||
except docker.errors.NotFound:
|
||||
pass
|
||||
|
||||
|
|
@ -322,13 +474,16 @@ class BotDockerManager:
|
|||
print(f"[DockerManager] Error ensuring monitor for {bot_id}: {e}")
|
||||
return False
|
||||
|
||||
def stop_bot(self, bot_id: str) -> bool:
|
||||
def stop_bot(self, bot_id: str, remove: bool = False) -> bool:
|
||||
if not self.client:
|
||||
return False
|
||||
container_name = f"worker_{bot_id}"
|
||||
try:
|
||||
container = self.client.containers.get(container_name)
|
||||
container.reload()
|
||||
if str(container.status or "").strip().lower() in {"running", "restarting", "paused"}:
|
||||
container.stop(timeout=5)
|
||||
if remove:
|
||||
container.remove()
|
||||
self.active_monitors.pop(bot_id, None)
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ def deactivate_bot_instance(session: Session, bot_id: str) -> Dict[str, Any]:
|
|||
|
||||
def delete_bot_instance(session: Session, bot_id: str, delete_workspace: bool = True) -> Dict[str, Any]:
|
||||
bot = _get_bot_or_404(session, bot_id)
|
||||
docker_manager.stop_bot(bot_id)
|
||||
docker_manager.stop_bot(bot_id, remove=True)
|
||||
|
||||
messages = session.exec(select(BotMessage).where(BotMessage.bot_id == bot_id)).all()
|
||||
for row in messages:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,206 @@
|
|||
import sys
|
||||
import tempfile
|
||||
import types
|
||||
import unittest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
docker_stub = types.ModuleType("docker")
|
||||
docker_stub.errors = types.SimpleNamespace(
|
||||
ImageNotFound=type("ImageNotFound", (Exception,), {}),
|
||||
NotFound=type("NotFound", (Exception,), {}),
|
||||
)
|
||||
sys.modules.setdefault("docker", docker_stub)
|
||||
|
||||
from core.docker_manager import BotDockerManager
|
||||
|
||||
|
||||
class BotDockerManagerTests(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self._tmpdir = tempfile.TemporaryDirectory()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self._tmpdir.cleanup()
|
||||
|
||||
def _make_manager(self) -> BotDockerManager:
|
||||
manager = BotDockerManager.__new__(BotDockerManager)
|
||||
manager.client = MagicMock()
|
||||
manager.host_data_root = self._tmpdir.name
|
||||
manager.base_image = "nanobot-base"
|
||||
manager.network_name = ""
|
||||
manager.active_monitors = {}
|
||||
manager._last_delivery_error = {}
|
||||
manager._storage_limit_supported = True
|
||||
manager._storage_limit_warning_emitted = False
|
||||
return manager
|
||||
|
||||
@staticmethod
|
||||
def _build_container(
|
||||
*,
|
||||
status: str,
|
||||
image: str,
|
||||
nano_cpus: int,
|
||||
memory_bytes: int,
|
||||
storage_opt_size: str,
|
||||
source_mount: str,
|
||||
network_name: str,
|
||||
bootstrap_label: str | None = "env-json-v1",
|
||||
) -> MagicMock:
|
||||
container = MagicMock()
|
||||
container.status = status
|
||||
container.reload = MagicMock()
|
||||
container.start = MagicMock()
|
||||
container.stop = MagicMock()
|
||||
container.remove = MagicMock()
|
||||
container.attrs = {
|
||||
"Config": {
|
||||
"Image": image,
|
||||
"Labels": (
|
||||
{"dashboard.runtime_bootstrap": bootstrap_label}
|
||||
if bootstrap_label is not None
|
||||
else {}
|
||||
),
|
||||
},
|
||||
"HostConfig": {
|
||||
"NanoCpus": nano_cpus,
|
||||
"Memory": memory_bytes,
|
||||
"StorageOpt": {"size": storage_opt_size},
|
||||
},
|
||||
"Mounts": [
|
||||
{
|
||||
"Source": source_mount,
|
||||
"Destination": "/root/.nanobot",
|
||||
"RW": True,
|
||||
}
|
||||
],
|
||||
"NetworkSettings": {
|
||||
"Networks": {network_name: {"IPAddress": "172.18.0.2"}},
|
||||
},
|
||||
}
|
||||
return container
|
||||
|
||||
def test_stop_bot_keeps_container_by_default(self) -> None:
|
||||
manager = self._make_manager()
|
||||
container = MagicMock()
|
||||
container.status = "running"
|
||||
container.reload = MagicMock()
|
||||
container.stop = MagicMock()
|
||||
container.remove = MagicMock()
|
||||
manager.client.containers.get.return_value = container
|
||||
|
||||
result = manager.stop_bot("demo")
|
||||
|
||||
self.assertTrue(result)
|
||||
container.stop.assert_called_once_with(timeout=5)
|
||||
container.remove.assert_not_called()
|
||||
|
||||
def test_stop_bot_remove_true_deletes_container(self) -> None:
|
||||
manager = self._make_manager()
|
||||
container = MagicMock()
|
||||
container.status = "exited"
|
||||
container.reload = MagicMock()
|
||||
container.stop = MagicMock()
|
||||
container.remove = MagicMock()
|
||||
manager.client.containers.get.return_value = container
|
||||
|
||||
result = manager.stop_bot("demo", remove=True)
|
||||
|
||||
self.assertTrue(result)
|
||||
container.stop.assert_not_called()
|
||||
container.remove.assert_called_once_with()
|
||||
|
||||
def test_start_bot_reuses_compatible_stopped_container(self) -> None:
|
||||
manager = self._make_manager()
|
||||
image_tag = "nanobot-base:v1"
|
||||
workspace_mount = f"{self._tmpdir.name}/demo/.nanobot"
|
||||
container = self._build_container(
|
||||
status="exited",
|
||||
image=image_tag,
|
||||
nano_cpus=1_000_000_000,
|
||||
memory_bytes=1024 * 1024 * 1024,
|
||||
storage_opt_size="10G",
|
||||
source_mount=workspace_mount,
|
||||
network_name="bridge",
|
||||
)
|
||||
manager.client.images.get.return_value = MagicMock()
|
||||
manager.client.containers.get.return_value = container
|
||||
|
||||
result = manager.start_bot(
|
||||
"demo",
|
||||
image_tag=image_tag,
|
||||
env_vars={"TZ": "UTC", "API_KEY": "updated-secret"},
|
||||
cpu_cores=1.0,
|
||||
memory_mb=1024,
|
||||
storage_gb=10,
|
||||
)
|
||||
|
||||
self.assertTrue(result)
|
||||
container.start.assert_called_once_with()
|
||||
container.remove.assert_not_called()
|
||||
manager.client.containers.run.assert_not_called()
|
||||
|
||||
def test_start_bot_recreates_incompatible_stopped_container(self) -> None:
|
||||
manager = self._make_manager()
|
||||
image_tag = "nanobot-base:v1"
|
||||
workspace_mount = f"{self._tmpdir.name}/demo/.nanobot"
|
||||
container = self._build_container(
|
||||
status="exited",
|
||||
image="nanobot-base:old",
|
||||
nano_cpus=1_000_000_000,
|
||||
memory_bytes=1024 * 1024 * 1024,
|
||||
storage_opt_size="10G",
|
||||
source_mount=workspace_mount,
|
||||
network_name="bridge",
|
||||
)
|
||||
manager.client.images.get.return_value = MagicMock()
|
||||
manager.client.containers.get.return_value = container
|
||||
manager._run_container_with_storage_fallback = MagicMock(return_value=MagicMock())
|
||||
|
||||
result = manager.start_bot(
|
||||
"demo",
|
||||
image_tag=image_tag,
|
||||
env_vars={"TZ": "Asia/Shanghai"},
|
||||
cpu_cores=1.0,
|
||||
memory_mb=1024,
|
||||
storage_gb=10,
|
||||
)
|
||||
|
||||
self.assertTrue(result)
|
||||
container.start.assert_not_called()
|
||||
container.remove.assert_called_once_with(force=True)
|
||||
manager._run_container_with_storage_fallback.assert_called_once()
|
||||
|
||||
def test_start_bot_recreates_container_without_new_entrypoint(self) -> None:
|
||||
manager = self._make_manager()
|
||||
image_tag = "nanobot-base:v1"
|
||||
workspace_mount = f"{self._tmpdir.name}/demo/.nanobot"
|
||||
container = self._build_container(
|
||||
status="exited",
|
||||
image=image_tag,
|
||||
nano_cpus=1_000_000_000,
|
||||
memory_bytes=1024 * 1024 * 1024,
|
||||
storage_opt_size="10G",
|
||||
source_mount=workspace_mount,
|
||||
network_name="bridge",
|
||||
bootstrap_label=None,
|
||||
)
|
||||
manager.client.images.get.return_value = MagicMock()
|
||||
manager.client.containers.get.return_value = container
|
||||
manager._run_container_with_storage_fallback = MagicMock(return_value=MagicMock())
|
||||
|
||||
result = manager.start_bot(
|
||||
"demo",
|
||||
image_tag=image_tag,
|
||||
env_vars={"TZ": "Asia/Shanghai"},
|
||||
cpu_cores=1.0,
|
||||
memory_mb=1024,
|
||||
storage_gb=10,
|
||||
)
|
||||
|
||||
self.assertTrue(result)
|
||||
container.start.assert_not_called()
|
||||
container.remove.assert_called_once_with(force=True)
|
||||
manager._run_container_with_storage_fallback.assert_called_once()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue