dashboard-nanobot/backend/services/topic_service.py

565 lines
18 KiB
Python
Raw Normal View History

2026-03-13 06:40:54 +00:00
import json
import re
from datetime import datetime
from typing import Any, Dict, List, Optional
2026-04-04 16:29:37 +00:00
from fastapi import HTTPException
2026-03-13 06:40:54 +00:00
from sqlmodel import Session, select
2026-04-04 16:29:37 +00:00
from models.bot import BotInstance
2026-03-13 06:40:54 +00:00
from models.topic import TopicItem, TopicTopic
TOPIC_DEDUPE_WINDOW_SECONDS = 10 * 60
TOPIC_LEVEL_SET = {"info", "warn", "error", "success"}
_TOPIC_KEY_RE = re.compile(r"^[a-z0-9][a-z0-9_.-]{0,63}$")
2026-04-04 16:29:37 +00:00
TOPIC_KEY_RE = _TOPIC_KEY_RE
2026-03-13 06:40:54 +00:00
def _as_bool(value: Any) -> bool:
if isinstance(value, bool):
return value
text = str(value or "").strip().lower()
return text in {"1", "true", "yes", "on", "y"}
def _normalize_topic_key(raw: Any) -> str:
value = str(raw or "").strip().lower()
if not value:
return ""
return value
def _parse_json_dict(raw: str) -> Dict[str, Any]:
text = str(raw or "").strip()
if not text:
return {}
try:
data = json.loads(text)
return data if isinstance(data, dict) else {}
except Exception:
return {}
def _parse_json_list(raw: str) -> List[Any]:
text = str(raw or "").strip()
if not text:
return []
try:
data = json.loads(text)
except Exception:
return []
return data if isinstance(data, list) else []
def _topic_to_dict(row: TopicTopic) -> Dict[str, Any]:
return {
"id": row.id,
"bot_id": row.bot_id,
"topic_key": str(row.topic_key or "").strip().lower(),
"name": row.name or "",
"description": row.description or "",
"is_active": bool(row.is_active),
"routing": _parse_json_dict(row.routing_json or "{}"),
"view_schema": _parse_json_dict(row.view_schema_json or "{}"),
"created_at": row.created_at.isoformat() if row.created_at else None,
"updated_at": row.updated_at.isoformat() if row.updated_at else None,
}
def _list_topics(session: Session, bot_id: str) -> List[Dict[str, Any]]:
rows = session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.order_by(TopicTopic.is_active.desc(), TopicTopic.topic_key.asc())
).all()
return [_topic_to_dict(row) for row in rows]
def _topic_item_to_dict(row: TopicItem) -> Dict[str, Any]:
return {
"id": row.id,
"bot_id": row.bot_id,
"topic_key": str(row.topic_key or "").strip().lower(),
"title": row.title or "",
"content": row.content or "",
"level": str(row.level or "info").strip().lower(),
"tags": _parse_json_list(row.tags_json or "[]"),
"view": _parse_json_dict(row.view_json or "{}"),
2026-03-14 07:44:11 +00:00
"source": row.source or "dashboard",
2026-03-13 06:40:54 +00:00
"dedupe_key": row.dedupe_key or "",
"is_read": bool(row.is_read),
"created_at": row.created_at.isoformat() if row.created_at else None,
}
def _topic_get_row(session: Session, bot_id: str, topic_key: str) -> Optional[TopicTopic]:
normalized = _normalize_topic_key(topic_key)
if not normalized:
return None
return session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.where(TopicTopic.topic_key == normalized)
.limit(1)
).first()
2026-04-04 16:29:37 +00:00
def _get_bot_or_404(session: Session, bot_id: str) -> BotInstance:
bot = session.get(BotInstance, bot_id)
if not bot:
raise HTTPException(status_code=404, detail="Bot not found")
return bot
2026-03-13 06:40:54 +00:00
def _normalize_topic_keywords(raw: Any) -> List[str]:
rows: List[str] = []
if isinstance(raw, list):
for item in raw:
text = str(item or "").strip().lower()
if text and text not in rows:
rows.append(text)
elif isinstance(raw, str):
text = raw.strip().lower()
if text:
rows.append(text)
return rows
def _topic_filter_reason(payload: Dict[str, Any]) -> str:
if _as_bool(payload.get("is_progress")):
return "progress message is filtered"
if _as_bool(payload.get("is_tool_hint")):
return "tool hint message is filtered"
source = str(payload.get("source") or payload.get("type") or "").strip().lower()
if source in {"progress", "tool_hint", "sendprogress", "sendtoolhints"}:
return f"{source} message is filtered"
return ""
def _topic_route_pick(
session: Session,
bot_id: str,
payload: Dict[str, Any],
requested_topic_key: str = "",
) -> Dict[str, Any]:
active_topics = session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.where(TopicTopic.is_active == True)
.order_by(TopicTopic.topic_key.asc())
).all()
if not active_topics:
return {
"matched": False,
"topic_key": None,
"confidence": 1.0,
"reason": "no active topic configured",
}
req_key = _normalize_topic_key(requested_topic_key or payload.get("topic_key") or payload.get("topic"))
if req_key:
row = _topic_get_row(session, bot_id, req_key)
if row and bool(row.is_active):
return {
"matched": True,
"topic_key": req_key,
"confidence": 0.99,
"reason": "explicit topic key accepted",
}
return {
"matched": False,
"topic_key": None,
"confidence": 0.72,
"reason": f"requested topic {req_key} unavailable or inactive",
}
text = " ".join(
[
str(payload.get("title") or "").strip(),
str(payload.get("content") or payload.get("text") or "").strip(),
" ".join([str(v or "").strip() for v in (payload.get("tags") or [])]),
]
).strip().lower()
if not text:
return {
"matched": False,
"topic_key": None,
"confidence": 1.0,
"reason": "no routing evidence",
}
best_key = ""
best_score = -10.0
best_reason = "no topic matched"
matched_include = False
for topic in active_topics:
key = _normalize_topic_key(topic.topic_key)
if not key:
continue
routing = _parse_json_dict(topic.routing_json or "{}")
include_when = _normalize_topic_keywords(routing.get("include_when"))
exclude_when = _normalize_topic_keywords(routing.get("exclude_when"))
priority_raw = routing.get("priority", 0)
try:
priority = max(0, min(int(priority_raw), 100))
except Exception:
priority = 0
include_hits = [kw for kw in include_when if kw in text]
exclude_hits = [kw for kw in exclude_when if kw in text]
if not include_hits:
continue
matched_include = True
score = float(len(include_hits) * 2 - len(exclude_hits) * 3) + (priority / 1000.0)
if score > best_score:
best_score = score
best_key = key
if include_hits:
best_reason = f"matched include_when: {', '.join(include_hits[:3])}"
elif exclude_hits:
best_reason = f"matched exclude_when: {', '.join(exclude_hits[:3])}"
else:
best_reason = "no include/exclude match, used highest priority active topic"
if not matched_include:
return {
"matched": False,
"topic_key": None,
"confidence": 0.68,
"reason": "no include_when matched",
}
if best_score <= 0:
return {
"matched": False,
"topic_key": None,
"confidence": 0.68,
"reason": "no positive routing score",
}
confidence = min(0.95, max(0.61, 0.61 + best_score / 12.0))
return {
"matched": True,
"topic_key": best_key,
"confidence": round(confidence, 3),
"reason": best_reason,
}
def _topic_publish_internal(session: Session, bot_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
filter_reason = _topic_filter_reason(payload)
if filter_reason:
return {
"published": False,
"skipped": True,
"reason": filter_reason,
}
title = str(payload.get("title") or "").strip()
content = str(payload.get("content") or payload.get("text") or "").strip()
if not title and not content:
return {
"published": False,
"skipped": True,
"reason": "empty title/content",
}
level = str(payload.get("level") or "info").strip().lower()
if level not in TOPIC_LEVEL_SET:
level = "info"
tags = payload.get("tags")
tags_rows: List[str] = []
if isinstance(tags, list):
for tag in tags:
text = str(tag or "").strip()
if text and text not in tags_rows:
tags_rows.append(text[:64])
route_result = _topic_route_pick(session, bot_id, payload, requested_topic_key=str(payload.get("topic_key") or ""))
if not bool(route_result.get("matched")):
return {
"published": False,
"skipped": True,
"reason": str(route_result.get("reason") or "no topic matched"),
"route": route_result,
}
topic_key = _normalize_topic_key(route_result.get("topic_key"))
if not topic_key:
return {
"published": False,
"skipped": True,
"reason": "invalid topic route result",
"route": route_result,
}
row = _topic_get_row(session, bot_id, topic_key)
if not row or not bool(row.is_active):
return {
"published": False,
"skipped": True,
"reason": f"topic {topic_key} unavailable or inactive",
"route": route_result,
}
dedupe_key = str(payload.get("dedupe_key") or "").strip()
if dedupe_key:
existing = session.exec(
select(TopicItem)
.where(TopicItem.bot_id == bot_id)
.where(TopicItem.dedupe_key == dedupe_key)
.order_by(TopicItem.id.desc())
.limit(1)
).first()
if existing and existing.created_at:
age_s = (datetime.utcnow() - existing.created_at).total_seconds()
if age_s <= TOPIC_DEDUPE_WINDOW_SECONDS:
return {
"published": False,
"deduped": True,
"dedupe_window_seconds": TOPIC_DEDUPE_WINDOW_SECONDS,
"topic_key": _normalize_topic_key(existing.topic_key),
"reason": "dedupe_key hit within dedupe window",
"item": _topic_item_to_dict(existing),
}
view = payload.get("view")
view_json = json.dumps(view, ensure_ascii=False) if isinstance(view, dict) else None
2026-03-14 07:44:11 +00:00
source = str(payload.get("source") or "dashboard").strip().lower() or "dashboard"
2026-03-13 06:40:54 +00:00
now = datetime.utcnow()
item = TopicItem(
bot_id=bot_id,
topic_key=topic_key,
title=title[:2000],
content=content[:20000],
level=level,
tags_json=json.dumps(tags_rows, ensure_ascii=False) if tags_rows else None,
view_json=view_json,
source=source[:64],
dedupe_key=dedupe_key[:200] if dedupe_key else None,
is_read=False,
created_at=now,
)
session.add(item)
session.commit()
session.refresh(item)
return {
"published": True,
"topic_key": topic_key,
"item": _topic_item_to_dict(item),
"route": route_result,
}
2026-04-04 16:29:37 +00:00
def normalize_topic_key(raw: Any) -> str:
return _normalize_topic_key(raw)
def list_topics(session: Session, bot_id: str) -> List[Dict[str, Any]]:
_get_bot_or_404(session, bot_id)
return _list_topics(session, bot_id)
def create_topic(
session: Session,
*,
bot_id: str,
topic_key: str,
name: Optional[str] = None,
description: Optional[str] = None,
is_active: bool = True,
routing: Optional[Dict[str, Any]] = None,
view_schema: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
_get_bot_or_404(session, bot_id)
normalized_key = _normalize_topic_key(topic_key)
if not normalized_key:
raise HTTPException(status_code=400, detail="topic_key is required")
if not TOPIC_KEY_RE.fullmatch(normalized_key):
raise HTTPException(status_code=400, detail="invalid topic_key")
exists = _topic_get_row(session, bot_id, normalized_key)
if exists:
raise HTTPException(status_code=400, detail=f"Topic already exists: {normalized_key}")
now = datetime.utcnow()
row = TopicTopic(
bot_id=bot_id,
topic_key=normalized_key,
name=str(name or normalized_key).strip() or normalized_key,
description=str(description or "").strip(),
is_active=bool(is_active),
is_default_fallback=False,
routing_json=json.dumps(routing or {}, ensure_ascii=False),
view_schema_json=json.dumps(view_schema or {}, ensure_ascii=False),
created_at=now,
updated_at=now,
)
session.add(row)
session.commit()
session.refresh(row)
return _topic_to_dict(row)
def update_topic(
session: Session,
*,
bot_id: str,
topic_key: str,
updates: Dict[str, Any],
) -> Dict[str, Any]:
_get_bot_or_404(session, bot_id)
normalized_key = _normalize_topic_key(topic_key)
if not normalized_key:
raise HTTPException(status_code=400, detail="topic_key is required")
row = _topic_get_row(session, bot_id, normalized_key)
if not row:
raise HTTPException(status_code=404, detail="Topic not found")
if "name" in updates:
row.name = str(updates.get("name") or "").strip() or row.topic_key
if "description" in updates:
row.description = str(updates.get("description") or "").strip()
if "is_active" in updates:
row.is_active = bool(updates.get("is_active"))
if "routing" in updates:
row.routing_json = json.dumps(updates.get("routing") or {}, ensure_ascii=False)
if "view_schema" in updates:
row.view_schema_json = json.dumps(updates.get("view_schema") or {}, ensure_ascii=False)
row.is_default_fallback = False
row.updated_at = datetime.utcnow()
session.add(row)
session.commit()
session.refresh(row)
return _topic_to_dict(row)
def delete_topic(session: Session, *, bot_id: str, topic_key: str) -> Dict[str, Any]:
_get_bot_or_404(session, bot_id)
normalized_key = _normalize_topic_key(topic_key)
if not normalized_key:
raise HTTPException(status_code=400, detail="topic_key is required")
row = _topic_get_row(session, bot_id, normalized_key)
if not row:
raise HTTPException(status_code=404, detail="Topic not found")
items = session.exec(
select(TopicItem)
.where(TopicItem.bot_id == bot_id)
.where(TopicItem.topic_key == normalized_key)
).all()
for item in items:
session.delete(item)
session.delete(row)
session.commit()
return {"status": "deleted", "bot_id": bot_id, "topic_key": normalized_key}
def _count_topic_items(
session: Session,
bot_id: str,
topic_key: Optional[str] = None,
unread_only: bool = False,
) -> int:
stmt = select(TopicItem).where(TopicItem.bot_id == bot_id)
normalized_topic_key = _normalize_topic_key(topic_key or "")
if normalized_topic_key:
stmt = stmt.where(TopicItem.topic_key == normalized_topic_key)
rows = session.exec(stmt).all()
if unread_only:
return sum(1 for row in rows if not bool(row.is_read))
return len(rows)
def list_topic_items(
session: Session,
*,
bot_id: str,
topic_key: Optional[str] = None,
cursor: Optional[int] = None,
limit: int = 50,
) -> Dict[str, Any]:
_get_bot_or_404(session, bot_id)
normalized_limit = max(1, min(int(limit or 50), 100))
stmt = select(TopicItem).where(TopicItem.bot_id == bot_id)
normalized_topic_key = _normalize_topic_key(topic_key or "")
if normalized_topic_key:
stmt = stmt.where(TopicItem.topic_key == normalized_topic_key)
if cursor is not None:
normalized_cursor = int(cursor)
if normalized_cursor > 0:
stmt = stmt.where(TopicItem.id < normalized_cursor)
rows = session.exec(stmt.order_by(TopicItem.id.desc()).limit(normalized_limit + 1)).all()
next_cursor: Optional[int] = None
if len(rows) > normalized_limit:
next_cursor = rows[-1].id
rows = rows[:normalized_limit]
return {
"bot_id": bot_id,
"topic_key": normalized_topic_key or None,
"items": [_topic_item_to_dict(row) for row in rows],
"next_cursor": next_cursor,
"unread_count": _count_topic_items(session, bot_id, normalized_topic_key, unread_only=True),
"total_unread_count": _count_topic_items(session, bot_id, unread_only=True),
}
def get_topic_item_stats(session: Session, *, bot_id: str) -> Dict[str, Any]:
_get_bot_or_404(session, bot_id)
latest_item = session.exec(
select(TopicItem)
.where(TopicItem.bot_id == bot_id)
.order_by(TopicItem.id.desc())
.limit(1)
).first()
return {
"bot_id": bot_id,
"total_count": _count_topic_items(session, bot_id),
"unread_count": _count_topic_items(session, bot_id, unread_only=True),
"latest_item_id": int(latest_item.id or 0) if latest_item and latest_item.id else None,
}
def mark_topic_item_read(session: Session, *, bot_id: str, item_id: int) -> Dict[str, Any]:
_get_bot_or_404(session, bot_id)
row = session.exec(
select(TopicItem)
.where(TopicItem.bot_id == bot_id)
.where(TopicItem.id == item_id)
.limit(1)
).first()
if not row:
raise HTTPException(status_code=404, detail="Topic item not found")
if not bool(row.is_read):
row.is_read = True
session.add(row)
session.commit()
session.refresh(row)
return {
"status": "updated",
"bot_id": bot_id,
"item": _topic_item_to_dict(row),
}
def delete_topic_item(session: Session, *, bot_id: str, item_id: int) -> Dict[str, Any]:
_get_bot_or_404(session, bot_id)
row = session.exec(
select(TopicItem)
.where(TopicItem.bot_id == bot_id)
.where(TopicItem.id == item_id)
.limit(1)
).first()
if not row:
raise HTTPException(status_code=404, detail="Topic item not found")
payload = _topic_item_to_dict(row)
session.delete(row)
session.commit()
return {
"status": "deleted",
"bot_id": bot_id,
"item": payload,
}
def publish_topic_item(session: Session, bot_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
_get_bot_or_404(session, bot_id)
return _topic_publish_internal(session, bot_id, payload)