783 lines
31 KiB
Python
783 lines
31 KiB
Python
import codecs
|
|
import csv
|
|
import hashlib
|
|
import json
|
|
import signal
|
|
import socket
|
|
import os
|
|
import re
|
|
import shlex
|
|
import shutil
|
|
import subprocess
|
|
import threading
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Callable, Dict, List, Optional
|
|
|
|
import httpx
|
|
import psutil
|
|
|
|
from app.core.settings import (
|
|
EDGE_BOTS_WORKSPACE_ROOT,
|
|
EDGE_NATIVE_COMMAND,
|
|
EDGE_NATIVE_DASHBOARD_URL,
|
|
EDGE_NATIVE_WORKDIR,
|
|
)
|
|
from app.runtime.base import EdgeRuntimeBackend
|
|
|
|
|
|
@dataclass
|
|
class _NativeProcessRecord:
|
|
process: subprocess.Popen[str]
|
|
command: List[str]
|
|
cwd: str
|
|
log_path: str
|
|
log_handle: Any
|
|
dashboard_url: str
|
|
dashboard_host: str
|
|
dashboard_port: int
|
|
cpu_cores: Optional[float]
|
|
memory_mb: Optional[int]
|
|
storage_gb: Optional[int]
|
|
stop_event: threading.Event = field(default_factory=threading.Event)
|
|
stdout_thread: Optional[threading.Thread] = None
|
|
last_error: str = ""
|
|
|
|
|
|
class EdgeNativeRuntimeBackend(EdgeRuntimeBackend):
|
|
runtime_kind = "native"
|
|
|
|
def __init__(self) -> None:
|
|
self._command = shlex.split(EDGE_NATIVE_COMMAND)
|
|
self._native_available = bool(self._command and shutil.which(self._command[0]))
|
|
self._last_errors: Dict[str, str] = {}
|
|
self._records: Dict[str, _NativeProcessRecord] = {}
|
|
self._lock = threading.RLock()
|
|
|
|
def capabilities(self) -> Dict[str, Any]:
|
|
available = bool(self._native_available)
|
|
return {
|
|
"protocol": {"version": "1"},
|
|
"runtime": {"docker": False, "native": available},
|
|
"workspace": {
|
|
"tree": True,
|
|
"read_file": True,
|
|
"write_markdown": True,
|
|
"upload_files": True,
|
|
"serve_file": True,
|
|
},
|
|
"monitor": {"logs": available, "ensure": available},
|
|
"process": {"command": list(self._command), "available": available},
|
|
}
|
|
|
|
def has_image(self, tag: str) -> bool:
|
|
return False
|
|
|
|
def start_bot(
|
|
self,
|
|
bot_id: str,
|
|
image_tag: Optional[str] = None,
|
|
env_vars: Optional[Dict[str, str]] = None,
|
|
workspace_root: Optional[str] = None,
|
|
native_command: Optional[str] = None,
|
|
native_workdir: Optional[str] = None,
|
|
cpu_cores: Optional[float] = None,
|
|
memory_mb: Optional[int] = None,
|
|
storage_gb: Optional[int] = None,
|
|
on_state_change: Optional[Callable[[str, dict], None]] = None,
|
|
) -> bool:
|
|
bot_id = str(bot_id or "").strip()
|
|
if not bot_id:
|
|
return False
|
|
effective_env = dict(env_vars or {})
|
|
launch_command = self._resolve_launch_command(native_command=native_command, env_vars=effective_env)
|
|
if not self._is_launch_command_available(launch_command):
|
|
self._set_last_error(bot_id, f"native command not available: {self._render_command(launch_command) or 'nanobot gateway'}")
|
|
return False
|
|
|
|
with self._lock:
|
|
existing = self._records.get(bot_id)
|
|
if existing and existing.process.poll() is None:
|
|
if on_state_change:
|
|
self.ensure_monitor(bot_id, on_state_change)
|
|
return True
|
|
if existing:
|
|
self._cleanup_record(bot_id, existing)
|
|
|
|
state_root = self._bot_root(bot_id)
|
|
workspace_dir = self._workspace_dir(bot_id=bot_id, workspace_root=workspace_root)
|
|
config_path = self._config_path(bot_id, workspace_root=workspace_root)
|
|
runtime_dir = os.path.join(os.path.dirname(config_path), "runtime")
|
|
os.makedirs(runtime_dir, exist_ok=True)
|
|
os.makedirs(workspace_dir, exist_ok=True)
|
|
log_path = os.path.join(runtime_dir, "native.log")
|
|
cwd = self._resolve_workdir(state_root, native_workdir=native_workdir, env_vars=effective_env)
|
|
dashboard_host, dashboard_port, dashboard_url = self._resolve_dashboard_endpoint(bot_id, effective_env)
|
|
env = os.environ.copy()
|
|
env.update({str(k): str(v) for k, v in effective_env.items() if str(k).strip()})
|
|
env.setdefault("PYTHONUNBUFFERED", "1")
|
|
env.setdefault("EDGE_RUNTIME_KIND", "native")
|
|
env.setdefault("EDGE_NODE_MODE", "native")
|
|
env.setdefault("NANOBOT_BOT_ID", bot_id)
|
|
env.setdefault("DASHBOARD_HOST", dashboard_host)
|
|
env.setdefault("DASHBOARD_PORT", str(dashboard_port))
|
|
env.setdefault("DASHBOARD_URL", dashboard_url)
|
|
env.setdefault("NANOBOT_CONFIG", config_path)
|
|
env.setdefault("NANOBOT_WORKSPACE", workspace_dir)
|
|
|
|
if not os.path.isfile(config_path):
|
|
self._set_last_error(bot_id, f"native config not found: {config_path}")
|
|
return False
|
|
|
|
self._terminate_orphan_processes(bot_id=bot_id, config_path=config_path)
|
|
|
|
log_handle = open(log_path, "a", encoding="utf-8")
|
|
command = self._build_launch_command(base_command=launch_command, config_path=config_path, workspace_dir=workspace_dir)
|
|
log_handle.write(
|
|
f"[{self._now()}] native bootstrap command={shlex.join(command)} cwd={cwd} config={config_path} workspace={workspace_dir} dashboard={dashboard_url}\n"
|
|
)
|
|
log_handle.flush()
|
|
try:
|
|
process = subprocess.Popen(
|
|
command,
|
|
cwd=cwd,
|
|
env=env,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
bufsize=1,
|
|
start_new_session=True,
|
|
)
|
|
except FileNotFoundError as exc:
|
|
log_handle.write(f"[{self._now()}] native bootstrap failed: {exc}\n")
|
|
log_handle.flush()
|
|
log_handle.close()
|
|
self._set_last_error(bot_id, f"native command not found: {exc}")
|
|
return False
|
|
except Exception as exc:
|
|
log_handle.write(f"[{self._now()}] native bootstrap failed: {exc}\n")
|
|
log_handle.flush()
|
|
log_handle.close()
|
|
self._set_last_error(bot_id, f"native start failed: {exc}")
|
|
return False
|
|
|
|
record = _NativeProcessRecord(
|
|
process=process,
|
|
command=command,
|
|
cwd=cwd,
|
|
log_path=log_path,
|
|
log_handle=log_handle,
|
|
dashboard_url=dashboard_url,
|
|
dashboard_host=dashboard_host,
|
|
dashboard_port=dashboard_port,
|
|
cpu_cores=cpu_cores,
|
|
memory_mb=memory_mb,
|
|
storage_gb=storage_gb,
|
|
)
|
|
self._records[bot_id] = record
|
|
record.stdout_thread = threading.Thread(
|
|
target=self._drain_stdout,
|
|
args=(bot_id, record, on_state_change),
|
|
daemon=True,
|
|
)
|
|
record.stdout_thread.start()
|
|
if not self._wait_for_dashboard_ready(record):
|
|
self._set_last_error(bot_id, f"native dashboard did not become ready: {dashboard_url}")
|
|
try:
|
|
if process.poll() is None:
|
|
process.terminate()
|
|
process.wait(timeout=5)
|
|
except Exception:
|
|
pass
|
|
self._cleanup_record(bot_id, record)
|
|
self._records.pop(bot_id, None)
|
|
return False
|
|
self._set_last_error(bot_id, "")
|
|
return True
|
|
|
|
def ensure_monitor(self, bot_id: str, on_state_change: Callable[[str, dict], None]) -> bool:
|
|
record = self._records.get(bot_id)
|
|
if record is None or record.process.poll() is not None:
|
|
return False
|
|
thread = record.stdout_thread
|
|
if thread is not None and thread.is_alive():
|
|
return True
|
|
record.stdout_thread = threading.Thread(
|
|
target=self._drain_stdout,
|
|
args=(bot_id, record, on_state_change),
|
|
daemon=True,
|
|
)
|
|
record.stdout_thread.start()
|
|
return True
|
|
|
|
def stop_bot(self, bot_id: str) -> bool:
|
|
bot_id = str(bot_id or "").strip()
|
|
with self._lock:
|
|
record = self._records.pop(bot_id, None)
|
|
stopped = False
|
|
if record is not None:
|
|
try:
|
|
if record.process.poll() is None:
|
|
record.stop_event.set()
|
|
record.process.terminate()
|
|
try:
|
|
record.process.wait(timeout=8)
|
|
except Exception:
|
|
record.process.kill()
|
|
record.process.wait(timeout=5)
|
|
self._cleanup_record(bot_id, record)
|
|
stopped = True
|
|
except Exception as exc:
|
|
self._set_last_error(bot_id, f"native stop failed: {exc}")
|
|
self._cleanup_record(bot_id, record)
|
|
return False
|
|
orphan_stopped = self._terminate_orphan_processes(bot_id=bot_id, config_path=self._config_path(bot_id))
|
|
return bool(stopped or orphan_stopped)
|
|
|
|
def get_bot_status(self, bot_id: str) -> str:
|
|
normalized_bot_id = str(bot_id or "").strip()
|
|
record = self._records.get(normalized_bot_id)
|
|
if record is None:
|
|
return "RUNNING" if self._has_orphan_process(normalized_bot_id) else "STOPPED"
|
|
try:
|
|
return "RUNNING" if record.process.poll() is None else "STOPPED"
|
|
except Exception:
|
|
return "STOPPED"
|
|
|
|
def get_bot_resource_snapshot(self, bot_id: str) -> Dict[str, Any]:
|
|
bot_id = str(bot_id or "").strip()
|
|
record = self._records.get(bot_id)
|
|
snapshot: Dict[str, Any] = {
|
|
"docker_status": self.get_bot_status(bot_id),
|
|
"limits": {
|
|
"cpu_cores": self._normalize_cpu_limit(record.cpu_cores if record else None),
|
|
"memory_bytes": self._normalize_memory_limit(record.memory_mb if record else None),
|
|
"storage_bytes": self._normalize_storage_limit(record.storage_gb if record else None),
|
|
"nano_cpus": 0,
|
|
"storage_opt_raw": "",
|
|
},
|
|
"usage": {
|
|
"cpu_percent": 0.0,
|
|
"memory_bytes": 0,
|
|
"memory_limit_bytes": 0,
|
|
"memory_percent": 0.0,
|
|
"network_rx_bytes": 0,
|
|
"network_tx_bytes": 0,
|
|
"blk_read_bytes": 0,
|
|
"blk_write_bytes": 0,
|
|
"pids": 0,
|
|
"container_rw_bytes": 0,
|
|
},
|
|
}
|
|
if record is None or record.process.poll() is not None:
|
|
return snapshot
|
|
try:
|
|
proc = psutil.Process(record.process.pid)
|
|
cpu_percent = float(proc.cpu_percent(interval=None) or 0.0)
|
|
memory_info = proc.memory_info()
|
|
memory_bytes = int(getattr(memory_info, "rss", 0) or 0)
|
|
memory_limit = int(psutil.virtual_memory().total or 0)
|
|
memory_percent = float(proc.memory_percent() or 0.0)
|
|
children = proc.children(recursive=True)
|
|
workspace_used = self._calc_workspace_used_bytes(bot_id)
|
|
snapshot["usage"].update(
|
|
{
|
|
"cpu_percent": round(cpu_percent, 2),
|
|
"memory_bytes": memory_bytes,
|
|
"memory_limit_bytes": memory_limit,
|
|
"memory_percent": round(memory_percent, 2),
|
|
"network_rx_bytes": 0,
|
|
"network_tx_bytes": 0,
|
|
"blk_read_bytes": 0,
|
|
"blk_write_bytes": 0,
|
|
"pids": 1 + len(children),
|
|
"container_rw_bytes": workspace_used,
|
|
}
|
|
)
|
|
except Exception:
|
|
workspace_used = self._calc_workspace_used_bytes(bot_id)
|
|
snapshot["usage"]["container_rw_bytes"] = workspace_used
|
|
return snapshot
|
|
|
|
def get_recent_logs(self, bot_id: str, tail: int = 300) -> List[str]:
|
|
log_path = self._log_path(str(bot_id or "").strip())
|
|
if not os.path.isfile(log_path):
|
|
return []
|
|
try:
|
|
with open(log_path, "r", encoding="utf-8", errors="ignore") as fh:
|
|
rows = [line.rstrip("\n") for line in fh.readlines() if line.strip()]
|
|
if tail > 0:
|
|
return rows[-int(tail) :]
|
|
return rows
|
|
except Exception:
|
|
return []
|
|
|
|
def send_command(self, bot_id: str, command: str, media: Optional[List[str]] = None) -> bool:
|
|
bot_id = str(bot_id or "").strip()
|
|
record = self._records.get(bot_id)
|
|
if record is None or record.process.poll() is not None:
|
|
self._set_last_error(bot_id, "native process is not running")
|
|
return False
|
|
try:
|
|
payload = {"message": command, "media": list(media or [])}
|
|
with httpx.Client(timeout=5.0, trust_env=False) as client:
|
|
resp = client.post(record.dashboard_url, json=payload)
|
|
if resp.status_code == 200:
|
|
self._set_last_error(bot_id, "")
|
|
return True
|
|
self._set_last_error(bot_id, f"native dashboard returned {resp.status_code}: {resp.text[:300]}")
|
|
return False
|
|
except Exception as exc:
|
|
self._set_last_error(bot_id, f"native dashboard request failed: {exc}")
|
|
return False
|
|
|
|
def get_last_delivery_error(self, bot_id: str) -> str:
|
|
bot_id = str(bot_id or "").strip()
|
|
record = self._records.get(bot_id)
|
|
if record is None:
|
|
return str(self._last_errors.get(bot_id) or "").strip()
|
|
return str(record.last_error or self._last_errors.get(bot_id) or "").strip()
|
|
|
|
def parse_monitor_packet(self, line: str) -> Optional[Dict[str, Any]]:
|
|
return self._parse_log_line(str(line or "").strip())
|
|
|
|
def _drain_stdout(
|
|
self,
|
|
bot_id: str,
|
|
record: _NativeProcessRecord,
|
|
callback: Optional[Callable[[str, dict], None]] = None,
|
|
) -> None:
|
|
stream = record.process.stdout
|
|
if stream is None:
|
|
return
|
|
try:
|
|
for raw_line in iter(stream.readline, ""):
|
|
if record.stop_event.is_set():
|
|
break
|
|
line = str(raw_line or "").rstrip("\r\n")
|
|
if not line:
|
|
continue
|
|
try:
|
|
record.log_handle.write(f"{line}\n")
|
|
record.log_handle.flush()
|
|
except Exception:
|
|
pass
|
|
if callback:
|
|
parsed = self._parse_log_line(line)
|
|
if parsed:
|
|
callback(bot_id, parsed)
|
|
callback(bot_id, {"type": "RAW_LOG", "text": line})
|
|
finally:
|
|
try:
|
|
stream.close()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
record.log_handle.flush()
|
|
except Exception:
|
|
pass
|
|
|
|
def _cleanup_record(self, bot_id: str, record: _NativeProcessRecord) -> None:
|
|
try:
|
|
record.stop_event.set()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
if record.log_handle and not record.log_handle.closed:
|
|
record.log_handle.flush()
|
|
record.log_handle.close()
|
|
except Exception:
|
|
pass
|
|
|
|
def _set_last_error(self, bot_id: str, message: str) -> None:
|
|
normalized_bot_id = str(bot_id or "").strip()
|
|
self._last_errors[normalized_bot_id] = str(message or "").strip()
|
|
record = self._records.get(normalized_bot_id)
|
|
if record is None:
|
|
return
|
|
record.last_error = self._last_errors[normalized_bot_id]
|
|
|
|
def _resolve_workdir(
|
|
self,
|
|
bot_root: str,
|
|
*,
|
|
native_workdir: Optional[str] = None,
|
|
env_vars: Optional[Dict[str, str]] = None,
|
|
) -> str:
|
|
configured = str(native_workdir or (env_vars or {}).get("EDGE_NATIVE_WORKDIR") or EDGE_NATIVE_WORKDIR or "").strip()
|
|
if configured:
|
|
return os.path.abspath(configured)
|
|
return os.path.abspath(bot_root)
|
|
|
|
def _resolve_dashboard_endpoint(self, bot_id: str, env_vars: Dict[str, str]) -> tuple[str, int, str]:
|
|
host = str(env_vars.get("DASHBOARD_HOST") or os.getenv("EDGE_NATIVE_DASHBOARD_HOST") or "127.0.0.1").strip() or "127.0.0.1"
|
|
raw_port = str(env_vars.get("DASHBOARD_PORT") or os.getenv("EDGE_NATIVE_DASHBOARD_PORT") or "").strip()
|
|
try:
|
|
port = int(raw_port) if raw_port else self._default_dashboard_port(bot_id)
|
|
except Exception:
|
|
port = self._default_dashboard_port(bot_id)
|
|
port = max(1, min(port, 65535))
|
|
url = str(env_vars.get("DASHBOARD_URL") or os.getenv("EDGE_NATIVE_DASHBOARD_URL") or f"http://{host}:{port}/chat").strip()
|
|
if not url:
|
|
url = f"http://{host}:{port}/chat"
|
|
return host, port, url
|
|
|
|
def _build_launch_command(self, *, base_command: List[str], config_path: str, workspace_dir: str) -> List[str]:
|
|
command = list(base_command)
|
|
has_config_flag = any(part in {"--config", "-c"} for part in command)
|
|
has_workspace_flag = any(part in {"--workspace", "-w"} for part in command)
|
|
if not has_config_flag:
|
|
command.extend(["--config", config_path])
|
|
if not has_workspace_flag:
|
|
command.extend(["--workspace", workspace_dir])
|
|
return command
|
|
|
|
def _resolve_launch_command(self, *, native_command: Optional[str], env_vars: Dict[str, str]) -> List[str]:
|
|
explicit = str(native_command or "").strip()
|
|
if explicit:
|
|
return self._parse_launcher_command(explicit)
|
|
configured = str(env_vars.get("EDGE_NATIVE_COMMAND") or "").strip()
|
|
if configured:
|
|
rows = self._parse_launcher_command(configured)
|
|
if rows:
|
|
return rows
|
|
return list(self._command)
|
|
|
|
@staticmethod
|
|
def _parse_launcher_command(raw_command: str) -> List[str]:
|
|
text = str(raw_command or "").strip()
|
|
if not text:
|
|
return []
|
|
if text.startswith("[") and text.endswith("]"):
|
|
try:
|
|
payload = json.loads(text)
|
|
if isinstance(payload, list):
|
|
rows = [str(item or "").strip() for item in payload if str(item or "").strip()]
|
|
if rows:
|
|
return rows
|
|
except Exception:
|
|
pass
|
|
if "," in text and any(mark in text for mark in ['"', "'"]):
|
|
try:
|
|
rows = [str(item or "").strip() for item in next(csv.reader([text], skipinitialspace=True)) if str(item or "").strip()]
|
|
if rows:
|
|
return rows
|
|
except Exception:
|
|
pass
|
|
try:
|
|
return [str(item or "").strip() for item in shlex.split(text) if str(item or "").strip()]
|
|
except Exception:
|
|
return []
|
|
|
|
@staticmethod
|
|
def _is_launch_command_available(command: List[str]) -> bool:
|
|
if not command:
|
|
return False
|
|
return bool(shutil.which(command[0]))
|
|
|
|
@staticmethod
|
|
def _render_command(command: List[str]) -> str:
|
|
return " ".join(str(part or "").strip() for part in command if str(part or "").strip())
|
|
|
|
def _log_path(self, bot_id: str) -> str:
|
|
config_path = self._config_path(bot_id)
|
|
return os.path.join(os.path.dirname(config_path), "runtime", "native.log")
|
|
|
|
def _config_path(self, bot_id: str, workspace_root: Optional[str] = None) -> str:
|
|
configured_root = str(workspace_root or "").strip()
|
|
if configured_root:
|
|
external_config = os.path.abspath(
|
|
os.path.join(
|
|
os.path.abspath(os.path.expanduser(configured_root)),
|
|
bot_id,
|
|
".nanobot",
|
|
"config.json",
|
|
)
|
|
)
|
|
if os.path.isfile(external_config):
|
|
return external_config
|
|
inferred_root = self._workspace_root_from_runtime_target(bot_id)
|
|
if inferred_root:
|
|
inferred_config = os.path.abspath(os.path.join(inferred_root, bot_id, ".nanobot", "config.json"))
|
|
if os.path.isfile(inferred_config):
|
|
return inferred_config
|
|
return os.path.join(self._bot_root(bot_id), ".nanobot", "config.json")
|
|
|
|
def _bot_root(self, bot_id: str) -> str:
|
|
return os.path.abspath(os.path.join(EDGE_BOTS_WORKSPACE_ROOT, bot_id))
|
|
|
|
def _workspace_dir(self, *, bot_id: str, workspace_root: Optional[str] = None) -> str:
|
|
configured_root = str(workspace_root or "").strip()
|
|
if configured_root:
|
|
normalized_root = os.path.abspath(os.path.expanduser(configured_root))
|
|
return os.path.abspath(os.path.join(normalized_root, bot_id, ".nanobot", "workspace"))
|
|
config_workspace = self._workspace_dir_from_config(bot_id)
|
|
if config_workspace:
|
|
return config_workspace
|
|
return os.path.abspath(os.path.join(self._bot_root(bot_id), ".nanobot", "workspace"))
|
|
|
|
def _workspace_dir_from_config(self, bot_id: str) -> Optional[str]:
|
|
config_path = self._config_path(bot_id)
|
|
if not os.path.isfile(config_path):
|
|
return None
|
|
try:
|
|
with open(config_path, "r", encoding="utf-8") as fh:
|
|
payload = json.load(fh)
|
|
if not isinstance(payload, dict):
|
|
return None
|
|
agents = payload.get("agents") if isinstance(payload.get("agents"), dict) else {}
|
|
defaults = agents.get("defaults") if isinstance(agents.get("defaults"), dict) else {}
|
|
workspace = str(defaults.get("workspace") or "").strip()
|
|
if not workspace:
|
|
return None
|
|
return os.path.abspath(os.path.expanduser(workspace))
|
|
except Exception:
|
|
return None
|
|
|
|
def _workspace_root_from_runtime_target(self, bot_id: str) -> str:
|
|
path = os.path.join(self._bot_root(bot_id), ".nanobot", "runtime-target.json")
|
|
if not os.path.isfile(path):
|
|
return ""
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as fh:
|
|
payload = json.load(fh)
|
|
if not isinstance(payload, dict):
|
|
return ""
|
|
raw_root = str(payload.get("workspace_root") or "").strip()
|
|
if not raw_root:
|
|
return ""
|
|
return os.path.abspath(os.path.expanduser(raw_root))
|
|
except Exception:
|
|
return ""
|
|
|
|
def _has_orphan_process(self, bot_id: str) -> bool:
|
|
return bool(self._find_orphan_processes(bot_id=bot_id, config_path=self._config_path(bot_id)))
|
|
|
|
def _find_orphan_processes(self, *, bot_id: str, config_path: str) -> List[psutil.Process]:
|
|
matches: List[psutil.Process] = []
|
|
normalized_config_path = os.path.abspath(config_path)
|
|
for proc in psutil.process_iter(["pid", "cmdline"]):
|
|
try:
|
|
cmdline = [str(part or "") for part in (proc.info.get("cmdline") or [])]
|
|
if not cmdline:
|
|
continue
|
|
joined = " ".join(cmdline)
|
|
if "nanobot.cli.commands" not in joined or " gateway" not in joined:
|
|
continue
|
|
if normalized_config_path not in joined:
|
|
continue
|
|
matches.append(proc)
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
continue
|
|
except Exception:
|
|
continue
|
|
return matches
|
|
|
|
def _terminate_orphan_processes(self, *, bot_id: str, config_path: str) -> int:
|
|
stopped = 0
|
|
for proc in self._find_orphan_processes(bot_id=bot_id, config_path=config_path):
|
|
try:
|
|
os.kill(int(proc.pid), signal.SIGTERM)
|
|
try:
|
|
proc.wait(timeout=5)
|
|
except psutil.TimeoutExpired:
|
|
os.kill(int(proc.pid), signal.SIGKILL)
|
|
proc.wait(timeout=3)
|
|
stopped += 1
|
|
except (psutil.NoSuchProcess, ProcessLookupError):
|
|
continue
|
|
except Exception as exc:
|
|
self._set_last_error(bot_id, f"failed to cleanup orphan native process: {exc}")
|
|
return stopped
|
|
|
|
@staticmethod
|
|
def _wait_for_dashboard_ready(record: _NativeProcessRecord, timeout_seconds: float = 8.0) -> bool:
|
|
deadline = time.monotonic() + max(1.0, float(timeout_seconds or 8.0))
|
|
while time.monotonic() < deadline:
|
|
if record.process.poll() is not None:
|
|
return False
|
|
try:
|
|
with socket.create_connection((record.dashboard_host, record.dashboard_port), timeout=0.5):
|
|
return True
|
|
except OSError:
|
|
time.sleep(0.2)
|
|
continue
|
|
return False
|
|
|
|
@staticmethod
|
|
def _default_dashboard_port(bot_id: str) -> int:
|
|
digest = hashlib.sha1(str(bot_id or "").strip().encode("utf-8")).hexdigest()
|
|
return 19000 + (int(digest[:6], 16) % 2000)
|
|
|
|
@staticmethod
|
|
def _normalize_cpu_limit(value: Optional[float]) -> Optional[float]:
|
|
if value is None:
|
|
return None
|
|
try:
|
|
return round(float(value), 2)
|
|
except Exception:
|
|
return None
|
|
|
|
@staticmethod
|
|
def _normalize_memory_limit(value: Optional[int]) -> Optional[int]:
|
|
if value is None:
|
|
return None
|
|
try:
|
|
return max(0, int(value)) * 1024 * 1024
|
|
except Exception:
|
|
return None
|
|
|
|
@staticmethod
|
|
def _normalize_storage_limit(value: Optional[int]) -> Optional[int]:
|
|
if value is None:
|
|
return None
|
|
try:
|
|
return max(0, int(value)) * 1024 * 1024 * 1024
|
|
except Exception:
|
|
return None
|
|
|
|
def _calc_workspace_used_bytes(self, bot_id: str) -> int:
|
|
total = 0
|
|
root = self._workspace_dir(bot_id=bot_id)
|
|
for current_root, _, files in os.walk(root):
|
|
for filename in files:
|
|
path = os.path.join(current_root, filename)
|
|
try:
|
|
total += int(os.path.getsize(path))
|
|
except Exception:
|
|
continue
|
|
return total
|
|
|
|
@staticmethod
|
|
def _now() -> str:
|
|
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
|
|
@staticmethod
|
|
def _parse_monitor_packet_json(line: str) -> Optional[Dict[str, Any]]:
|
|
if "__DASHBOARD_DATA_START__" not in line or "__DASHBOARD_DATA_END__" not in line:
|
|
return None
|
|
try:
|
|
raw_json = line.split("__DASHBOARD_DATA_START__", 1)[1].split("__DASHBOARD_DATA_END__", 1)[0].strip()
|
|
data = json.loads(raw_json)
|
|
event_type = str(data.get("type", "")).upper()
|
|
content = str(data.get("content") or data.get("text") or "").strip()
|
|
media = [str(v).strip().replace("\\", "/") for v in (data.get("media") or []) if str(v).strip()]
|
|
is_progress = bool(data.get("is_progress", False))
|
|
is_tool = bool(data.get("is_tool", False))
|
|
usage = data.get("usage") if isinstance(data.get("usage"), dict) else None
|
|
request_id = str(data.get("request_id") or "").strip() or None
|
|
provider = str(data.get("provider") or "").strip() or None
|
|
model = str(data.get("model") or "").strip() or None
|
|
|
|
if event_type == "AGENT_STATE":
|
|
payload = data.get("payload") or {}
|
|
state = str(payload.get("state") or data.get("state") or ("TOOL_CALL" if is_tool else "THINKING"))
|
|
action_msg = str(payload.get("action_msg") or payload.get("msg") or content)
|
|
return {
|
|
"type": "AGENT_STATE",
|
|
"channel": "dashboard",
|
|
"payload": {"state": state, "action_msg": action_msg},
|
|
"request_id": request_id,
|
|
}
|
|
|
|
if event_type == "ASSISTANT_MESSAGE":
|
|
if content or media:
|
|
return {
|
|
"type": "ASSISTANT_MESSAGE",
|
|
"channel": "dashboard",
|
|
"text": content,
|
|
"media": media,
|
|
"usage": usage,
|
|
"request_id": request_id,
|
|
"provider": provider,
|
|
"model": model,
|
|
}
|
|
return None
|
|
|
|
if event_type == "BUS_EVENT" or is_progress:
|
|
return {
|
|
"type": "BUS_EVENT",
|
|
"channel": "dashboard",
|
|
"content": content,
|
|
"media": media,
|
|
"is_progress": is_progress,
|
|
"is_tool": is_tool,
|
|
"usage": usage,
|
|
"request_id": request_id,
|
|
"provider": provider,
|
|
"model": model,
|
|
}
|
|
|
|
if content or media:
|
|
return {
|
|
"type": "ASSISTANT_MESSAGE",
|
|
"channel": "dashboard",
|
|
"text": content,
|
|
"media": media,
|
|
"usage": usage,
|
|
"request_id": request_id,
|
|
"provider": provider,
|
|
"model": model,
|
|
}
|
|
except Exception:
|
|
return None
|
|
return None
|
|
|
|
@classmethod
|
|
def _parse_log_line(cls, line: str) -> Optional[Dict[str, Any]]:
|
|
if "__DASHBOARD_DATA_START__" in line:
|
|
packet = cls._parse_monitor_packet_json(line)
|
|
if packet:
|
|
return packet
|
|
|
|
process_match = re.search(r"Processing message from ([\w\-]+):[^:]+:\s*(.+)$", line)
|
|
if process_match:
|
|
channel = process_match.group(1).strip().lower()
|
|
action_msg = process_match.group(2).strip()
|
|
return {
|
|
"type": "AGENT_STATE",
|
|
"channel": channel,
|
|
"payload": {
|
|
"state": "THINKING",
|
|
"action_msg": action_msg[:4000],
|
|
},
|
|
}
|
|
|
|
response_match = re.search(r"Response to ([\w\-]+):[^:]+:\s*(.+)$", line)
|
|
if response_match:
|
|
channel = response_match.group(1).strip().lower()
|
|
action_msg = response_match.group(2).strip()
|
|
if channel == "dashboard":
|
|
return {
|
|
"type": "ASSISTANT_MESSAGE",
|
|
"channel": "dashboard",
|
|
"text": action_msg[:4000],
|
|
}
|
|
return {
|
|
"type": "AGENT_STATE",
|
|
"channel": channel,
|
|
"payload": {
|
|
"state": "SUCCESS",
|
|
"action_msg": action_msg[:4000],
|
|
},
|
|
}
|
|
|
|
tool_call_match = re.search(r"tool call:\s*(.+)$", line, re.IGNORECASE)
|
|
if tool_call_match:
|
|
return {
|
|
"type": "AGENT_STATE",
|
|
"payload": {
|
|
"state": "TOOL_CALL",
|
|
"action_msg": tool_call_match.group(1).strip()[:4000],
|
|
},
|
|
}
|
|
|
|
lower = line.lower()
|
|
if "error" in lower or "traceback" in lower:
|
|
return {
|
|
"type": "AGENT_STATE",
|
|
"payload": {"state": "ERROR", "action_msg": "执行异常,请检查日志"},
|
|
}
|
|
return None
|