565 lines
18 KiB
Python
565 lines
18 KiB
Python
import json
|
|
import re
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from fastapi import HTTPException
|
|
from sqlmodel import Session, select
|
|
|
|
from models.bot import BotInstance
|
|
from models.topic import TopicItem, TopicTopic
|
|
|
|
TOPIC_DEDUPE_WINDOW_SECONDS = 10 * 60
|
|
TOPIC_LEVEL_SET = {"info", "warn", "error", "success"}
|
|
_TOPIC_KEY_RE = re.compile(r"^[a-z0-9][a-z0-9_.-]{0,63}$")
|
|
TOPIC_KEY_RE = _TOPIC_KEY_RE
|
|
|
|
|
|
def _as_bool(value: Any) -> bool:
|
|
if isinstance(value, bool):
|
|
return value
|
|
text = str(value or "").strip().lower()
|
|
return text in {"1", "true", "yes", "on", "y"}
|
|
|
|
|
|
def _normalize_topic_key(raw: Any) -> str:
|
|
value = str(raw or "").strip().lower()
|
|
if not value:
|
|
return ""
|
|
return value
|
|
|
|
|
|
def _parse_json_dict(raw: str) -> Dict[str, Any]:
|
|
text = str(raw or "").strip()
|
|
if not text:
|
|
return {}
|
|
try:
|
|
data = json.loads(text)
|
|
return data if isinstance(data, dict) else {}
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def _parse_json_list(raw: str) -> List[Any]:
|
|
text = str(raw or "").strip()
|
|
if not text:
|
|
return []
|
|
try:
|
|
data = json.loads(text)
|
|
except Exception:
|
|
return []
|
|
return data if isinstance(data, list) else []
|
|
|
|
|
|
def _topic_to_dict(row: TopicTopic) -> Dict[str, Any]:
|
|
return {
|
|
"id": row.id,
|
|
"bot_id": row.bot_id,
|
|
"topic_key": str(row.topic_key or "").strip().lower(),
|
|
"name": row.name or "",
|
|
"description": row.description or "",
|
|
"is_active": bool(row.is_active),
|
|
"routing": _parse_json_dict(row.routing_json or "{}"),
|
|
"view_schema": _parse_json_dict(row.view_schema_json or "{}"),
|
|
"created_at": row.created_at.isoformat() if row.created_at else None,
|
|
"updated_at": row.updated_at.isoformat() if row.updated_at else None,
|
|
}
|
|
|
|
|
|
def _list_topics(session: Session, bot_id: str) -> List[Dict[str, Any]]:
|
|
rows = session.exec(
|
|
select(TopicTopic)
|
|
.where(TopicTopic.bot_id == bot_id)
|
|
.order_by(TopicTopic.is_active.desc(), TopicTopic.topic_key.asc())
|
|
).all()
|
|
return [_topic_to_dict(row) for row in rows]
|
|
|
|
|
|
def _topic_item_to_dict(row: TopicItem) -> Dict[str, Any]:
|
|
return {
|
|
"id": row.id,
|
|
"bot_id": row.bot_id,
|
|
"topic_key": str(row.topic_key or "").strip().lower(),
|
|
"title": row.title or "",
|
|
"content": row.content or "",
|
|
"level": str(row.level or "info").strip().lower(),
|
|
"tags": _parse_json_list(row.tags_json or "[]"),
|
|
"view": _parse_json_dict(row.view_json or "{}"),
|
|
"source": row.source or "dashboard",
|
|
"dedupe_key": row.dedupe_key or "",
|
|
"is_read": bool(row.is_read),
|
|
"created_at": row.created_at.isoformat() if row.created_at else None,
|
|
}
|
|
|
|
|
|
def _topic_get_row(session: Session, bot_id: str, topic_key: str) -> Optional[TopicTopic]:
|
|
normalized = _normalize_topic_key(topic_key)
|
|
if not normalized:
|
|
return None
|
|
return session.exec(
|
|
select(TopicTopic)
|
|
.where(TopicTopic.bot_id == bot_id)
|
|
.where(TopicTopic.topic_key == normalized)
|
|
.limit(1)
|
|
).first()
|
|
|
|
|
|
def _get_bot_or_404(session: Session, bot_id: str) -> BotInstance:
|
|
bot = session.get(BotInstance, bot_id)
|
|
if not bot:
|
|
raise HTTPException(status_code=404, detail="Bot not found")
|
|
return bot
|
|
|
|
|
|
def _normalize_topic_keywords(raw: Any) -> List[str]:
|
|
rows: List[str] = []
|
|
if isinstance(raw, list):
|
|
for item in raw:
|
|
text = str(item or "").strip().lower()
|
|
if text and text not in rows:
|
|
rows.append(text)
|
|
elif isinstance(raw, str):
|
|
text = raw.strip().lower()
|
|
if text:
|
|
rows.append(text)
|
|
return rows
|
|
|
|
|
|
def _topic_filter_reason(payload: Dict[str, Any]) -> str:
|
|
if _as_bool(payload.get("is_progress")):
|
|
return "progress message is filtered"
|
|
if _as_bool(payload.get("is_tool_hint")):
|
|
return "tool hint message is filtered"
|
|
source = str(payload.get("source") or payload.get("type") or "").strip().lower()
|
|
if source in {"progress", "tool_hint", "sendprogress", "sendtoolhints"}:
|
|
return f"{source} message is filtered"
|
|
return ""
|
|
|
|
|
|
def _topic_route_pick(
|
|
session: Session,
|
|
bot_id: str,
|
|
payload: Dict[str, Any],
|
|
requested_topic_key: str = "",
|
|
) -> Dict[str, Any]:
|
|
active_topics = session.exec(
|
|
select(TopicTopic)
|
|
.where(TopicTopic.bot_id == bot_id)
|
|
.where(TopicTopic.is_active == True)
|
|
.order_by(TopicTopic.topic_key.asc())
|
|
).all()
|
|
if not active_topics:
|
|
return {
|
|
"matched": False,
|
|
"topic_key": None,
|
|
"confidence": 1.0,
|
|
"reason": "no active topic configured",
|
|
}
|
|
|
|
req_key = _normalize_topic_key(requested_topic_key or payload.get("topic_key") or payload.get("topic"))
|
|
if req_key:
|
|
row = _topic_get_row(session, bot_id, req_key)
|
|
if row and bool(row.is_active):
|
|
return {
|
|
"matched": True,
|
|
"topic_key": req_key,
|
|
"confidence": 0.99,
|
|
"reason": "explicit topic key accepted",
|
|
}
|
|
return {
|
|
"matched": False,
|
|
"topic_key": None,
|
|
"confidence": 0.72,
|
|
"reason": f"requested topic {req_key} unavailable or inactive",
|
|
}
|
|
|
|
text = " ".join(
|
|
[
|
|
str(payload.get("title") or "").strip(),
|
|
str(payload.get("content") or payload.get("text") or "").strip(),
|
|
" ".join([str(v or "").strip() for v in (payload.get("tags") or [])]),
|
|
]
|
|
).strip().lower()
|
|
|
|
if not text:
|
|
return {
|
|
"matched": False,
|
|
"topic_key": None,
|
|
"confidence": 1.0,
|
|
"reason": "no routing evidence",
|
|
}
|
|
|
|
best_key = ""
|
|
best_score = -10.0
|
|
best_reason = "no topic matched"
|
|
matched_include = False
|
|
for topic in active_topics:
|
|
key = _normalize_topic_key(topic.topic_key)
|
|
if not key:
|
|
continue
|
|
|
|
routing = _parse_json_dict(topic.routing_json or "{}")
|
|
include_when = _normalize_topic_keywords(routing.get("include_when"))
|
|
exclude_when = _normalize_topic_keywords(routing.get("exclude_when"))
|
|
priority_raw = routing.get("priority", 0)
|
|
try:
|
|
priority = max(0, min(int(priority_raw), 100))
|
|
except Exception:
|
|
priority = 0
|
|
|
|
include_hits = [kw for kw in include_when if kw in text]
|
|
exclude_hits = [kw for kw in exclude_when if kw in text]
|
|
if not include_hits:
|
|
continue
|
|
matched_include = True
|
|
score = float(len(include_hits) * 2 - len(exclude_hits) * 3) + (priority / 1000.0)
|
|
if score > best_score:
|
|
best_score = score
|
|
best_key = key
|
|
if include_hits:
|
|
best_reason = f"matched include_when: {', '.join(include_hits[:3])}"
|
|
elif exclude_hits:
|
|
best_reason = f"matched exclude_when: {', '.join(exclude_hits[:3])}"
|
|
else:
|
|
best_reason = "no include/exclude match, used highest priority active topic"
|
|
|
|
if not matched_include:
|
|
return {
|
|
"matched": False,
|
|
"topic_key": None,
|
|
"confidence": 0.68,
|
|
"reason": "no include_when matched",
|
|
}
|
|
if best_score <= 0:
|
|
return {
|
|
"matched": False,
|
|
"topic_key": None,
|
|
"confidence": 0.68,
|
|
"reason": "no positive routing score",
|
|
}
|
|
confidence = min(0.95, max(0.61, 0.61 + best_score / 12.0))
|
|
return {
|
|
"matched": True,
|
|
"topic_key": best_key,
|
|
"confidence": round(confidence, 3),
|
|
"reason": best_reason,
|
|
}
|
|
|
|
|
|
def _topic_publish_internal(session: Session, bot_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
filter_reason = _topic_filter_reason(payload)
|
|
if filter_reason:
|
|
return {
|
|
"published": False,
|
|
"skipped": True,
|
|
"reason": filter_reason,
|
|
}
|
|
|
|
title = str(payload.get("title") or "").strip()
|
|
content = str(payload.get("content") or payload.get("text") or "").strip()
|
|
if not title and not content:
|
|
return {
|
|
"published": False,
|
|
"skipped": True,
|
|
"reason": "empty title/content",
|
|
}
|
|
|
|
level = str(payload.get("level") or "info").strip().lower()
|
|
if level not in TOPIC_LEVEL_SET:
|
|
level = "info"
|
|
|
|
tags = payload.get("tags")
|
|
tags_rows: List[str] = []
|
|
if isinstance(tags, list):
|
|
for tag in tags:
|
|
text = str(tag or "").strip()
|
|
if text and text not in tags_rows:
|
|
tags_rows.append(text[:64])
|
|
|
|
route_result = _topic_route_pick(session, bot_id, payload, requested_topic_key=str(payload.get("topic_key") or ""))
|
|
if not bool(route_result.get("matched")):
|
|
return {
|
|
"published": False,
|
|
"skipped": True,
|
|
"reason": str(route_result.get("reason") or "no topic matched"),
|
|
"route": route_result,
|
|
}
|
|
topic_key = _normalize_topic_key(route_result.get("topic_key"))
|
|
if not topic_key:
|
|
return {
|
|
"published": False,
|
|
"skipped": True,
|
|
"reason": "invalid topic route result",
|
|
"route": route_result,
|
|
}
|
|
row = _topic_get_row(session, bot_id, topic_key)
|
|
if not row or not bool(row.is_active):
|
|
return {
|
|
"published": False,
|
|
"skipped": True,
|
|
"reason": f"topic {topic_key} unavailable or inactive",
|
|
"route": route_result,
|
|
}
|
|
|
|
dedupe_key = str(payload.get("dedupe_key") or "").strip()
|
|
if dedupe_key:
|
|
existing = session.exec(
|
|
select(TopicItem)
|
|
.where(TopicItem.bot_id == bot_id)
|
|
.where(TopicItem.dedupe_key == dedupe_key)
|
|
.order_by(TopicItem.id.desc())
|
|
.limit(1)
|
|
).first()
|
|
if existing and existing.created_at:
|
|
age_s = (datetime.utcnow() - existing.created_at).total_seconds()
|
|
if age_s <= TOPIC_DEDUPE_WINDOW_SECONDS:
|
|
return {
|
|
"published": False,
|
|
"deduped": True,
|
|
"dedupe_window_seconds": TOPIC_DEDUPE_WINDOW_SECONDS,
|
|
"topic_key": _normalize_topic_key(existing.topic_key),
|
|
"reason": "dedupe_key hit within dedupe window",
|
|
"item": _topic_item_to_dict(existing),
|
|
}
|
|
|
|
view = payload.get("view")
|
|
view_json = json.dumps(view, ensure_ascii=False) if isinstance(view, dict) else None
|
|
source = str(payload.get("source") or "dashboard").strip().lower() or "dashboard"
|
|
now = datetime.utcnow()
|
|
item = TopicItem(
|
|
bot_id=bot_id,
|
|
topic_key=topic_key,
|
|
title=title[:2000],
|
|
content=content[:20000],
|
|
level=level,
|
|
tags_json=json.dumps(tags_rows, ensure_ascii=False) if tags_rows else None,
|
|
view_json=view_json,
|
|
source=source[:64],
|
|
dedupe_key=dedupe_key[:200] if dedupe_key else None,
|
|
is_read=False,
|
|
created_at=now,
|
|
)
|
|
session.add(item)
|
|
session.commit()
|
|
session.refresh(item)
|
|
return {
|
|
"published": True,
|
|
"topic_key": topic_key,
|
|
"item": _topic_item_to_dict(item),
|
|
"route": route_result,
|
|
}
|
|
|
|
|
|
def normalize_topic_key(raw: Any) -> str:
|
|
return _normalize_topic_key(raw)
|
|
|
|
|
|
def list_topics(session: Session, bot_id: str) -> List[Dict[str, Any]]:
|
|
_get_bot_or_404(session, bot_id)
|
|
return _list_topics(session, bot_id)
|
|
|
|
|
|
def create_topic(
|
|
session: Session,
|
|
*,
|
|
bot_id: str,
|
|
topic_key: str,
|
|
name: Optional[str] = None,
|
|
description: Optional[str] = None,
|
|
is_active: bool = True,
|
|
routing: Optional[Dict[str, Any]] = None,
|
|
view_schema: Optional[Dict[str, Any]] = None,
|
|
) -> Dict[str, Any]:
|
|
_get_bot_or_404(session, bot_id)
|
|
normalized_key = _normalize_topic_key(topic_key)
|
|
if not normalized_key:
|
|
raise HTTPException(status_code=400, detail="topic_key is required")
|
|
if not TOPIC_KEY_RE.fullmatch(normalized_key):
|
|
raise HTTPException(status_code=400, detail="invalid topic_key")
|
|
exists = _topic_get_row(session, bot_id, normalized_key)
|
|
if exists:
|
|
raise HTTPException(status_code=400, detail=f"Topic already exists: {normalized_key}")
|
|
|
|
now = datetime.utcnow()
|
|
row = TopicTopic(
|
|
bot_id=bot_id,
|
|
topic_key=normalized_key,
|
|
name=str(name or normalized_key).strip() or normalized_key,
|
|
description=str(description or "").strip(),
|
|
is_active=bool(is_active),
|
|
is_default_fallback=False,
|
|
routing_json=json.dumps(routing or {}, ensure_ascii=False),
|
|
view_schema_json=json.dumps(view_schema or {}, ensure_ascii=False),
|
|
created_at=now,
|
|
updated_at=now,
|
|
)
|
|
session.add(row)
|
|
session.commit()
|
|
session.refresh(row)
|
|
return _topic_to_dict(row)
|
|
|
|
|
|
def update_topic(
|
|
session: Session,
|
|
*,
|
|
bot_id: str,
|
|
topic_key: str,
|
|
updates: Dict[str, Any],
|
|
) -> Dict[str, Any]:
|
|
_get_bot_or_404(session, bot_id)
|
|
normalized_key = _normalize_topic_key(topic_key)
|
|
if not normalized_key:
|
|
raise HTTPException(status_code=400, detail="topic_key is required")
|
|
row = _topic_get_row(session, bot_id, normalized_key)
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail="Topic not found")
|
|
|
|
if "name" in updates:
|
|
row.name = str(updates.get("name") or "").strip() or row.topic_key
|
|
if "description" in updates:
|
|
row.description = str(updates.get("description") or "").strip()
|
|
if "is_active" in updates:
|
|
row.is_active = bool(updates.get("is_active"))
|
|
if "routing" in updates:
|
|
row.routing_json = json.dumps(updates.get("routing") or {}, ensure_ascii=False)
|
|
if "view_schema" in updates:
|
|
row.view_schema_json = json.dumps(updates.get("view_schema") or {}, ensure_ascii=False)
|
|
row.is_default_fallback = False
|
|
row.updated_at = datetime.utcnow()
|
|
session.add(row)
|
|
session.commit()
|
|
session.refresh(row)
|
|
return _topic_to_dict(row)
|
|
|
|
|
|
def delete_topic(session: Session, *, bot_id: str, topic_key: str) -> Dict[str, Any]:
|
|
_get_bot_or_404(session, bot_id)
|
|
normalized_key = _normalize_topic_key(topic_key)
|
|
if not normalized_key:
|
|
raise HTTPException(status_code=400, detail="topic_key is required")
|
|
row = _topic_get_row(session, bot_id, normalized_key)
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail="Topic not found")
|
|
items = session.exec(
|
|
select(TopicItem)
|
|
.where(TopicItem.bot_id == bot_id)
|
|
.where(TopicItem.topic_key == normalized_key)
|
|
).all()
|
|
for item in items:
|
|
session.delete(item)
|
|
session.delete(row)
|
|
session.commit()
|
|
return {"status": "deleted", "bot_id": bot_id, "topic_key": normalized_key}
|
|
|
|
|
|
def _count_topic_items(
|
|
session: Session,
|
|
bot_id: str,
|
|
topic_key: Optional[str] = None,
|
|
unread_only: bool = False,
|
|
) -> int:
|
|
stmt = select(TopicItem).where(TopicItem.bot_id == bot_id)
|
|
normalized_topic_key = _normalize_topic_key(topic_key or "")
|
|
if normalized_topic_key:
|
|
stmt = stmt.where(TopicItem.topic_key == normalized_topic_key)
|
|
rows = session.exec(stmt).all()
|
|
if unread_only:
|
|
return sum(1 for row in rows if not bool(row.is_read))
|
|
return len(rows)
|
|
|
|
|
|
def list_topic_items(
|
|
session: Session,
|
|
*,
|
|
bot_id: str,
|
|
topic_key: Optional[str] = None,
|
|
cursor: Optional[int] = None,
|
|
limit: int = 50,
|
|
) -> Dict[str, Any]:
|
|
_get_bot_or_404(session, bot_id)
|
|
normalized_limit = max(1, min(int(limit or 50), 100))
|
|
stmt = select(TopicItem).where(TopicItem.bot_id == bot_id)
|
|
normalized_topic_key = _normalize_topic_key(topic_key or "")
|
|
if normalized_topic_key:
|
|
stmt = stmt.where(TopicItem.topic_key == normalized_topic_key)
|
|
if cursor is not None:
|
|
normalized_cursor = int(cursor)
|
|
if normalized_cursor > 0:
|
|
stmt = stmt.where(TopicItem.id < normalized_cursor)
|
|
rows = session.exec(stmt.order_by(TopicItem.id.desc()).limit(normalized_limit + 1)).all()
|
|
next_cursor: Optional[int] = None
|
|
if len(rows) > normalized_limit:
|
|
next_cursor = rows[-1].id
|
|
rows = rows[:normalized_limit]
|
|
return {
|
|
"bot_id": bot_id,
|
|
"topic_key": normalized_topic_key or None,
|
|
"items": [_topic_item_to_dict(row) for row in rows],
|
|
"next_cursor": next_cursor,
|
|
"unread_count": _count_topic_items(session, bot_id, normalized_topic_key, unread_only=True),
|
|
"total_unread_count": _count_topic_items(session, bot_id, unread_only=True),
|
|
}
|
|
|
|
|
|
def get_topic_item_stats(session: Session, *, bot_id: str) -> Dict[str, Any]:
|
|
_get_bot_or_404(session, bot_id)
|
|
latest_item = session.exec(
|
|
select(TopicItem)
|
|
.where(TopicItem.bot_id == bot_id)
|
|
.order_by(TopicItem.id.desc())
|
|
.limit(1)
|
|
).first()
|
|
return {
|
|
"bot_id": bot_id,
|
|
"total_count": _count_topic_items(session, bot_id),
|
|
"unread_count": _count_topic_items(session, bot_id, unread_only=True),
|
|
"latest_item_id": int(latest_item.id or 0) if latest_item and latest_item.id else None,
|
|
}
|
|
|
|
|
|
def mark_topic_item_read(session: Session, *, bot_id: str, item_id: int) -> Dict[str, Any]:
|
|
_get_bot_or_404(session, bot_id)
|
|
row = session.exec(
|
|
select(TopicItem)
|
|
.where(TopicItem.bot_id == bot_id)
|
|
.where(TopicItem.id == item_id)
|
|
.limit(1)
|
|
).first()
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail="Topic item not found")
|
|
if not bool(row.is_read):
|
|
row.is_read = True
|
|
session.add(row)
|
|
session.commit()
|
|
session.refresh(row)
|
|
return {
|
|
"status": "updated",
|
|
"bot_id": bot_id,
|
|
"item": _topic_item_to_dict(row),
|
|
}
|
|
|
|
|
|
def delete_topic_item(session: Session, *, bot_id: str, item_id: int) -> Dict[str, Any]:
|
|
_get_bot_or_404(session, bot_id)
|
|
row = session.exec(
|
|
select(TopicItem)
|
|
.where(TopicItem.bot_id == bot_id)
|
|
.where(TopicItem.id == item_id)
|
|
.limit(1)
|
|
).first()
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail="Topic item not found")
|
|
payload = _topic_item_to_dict(row)
|
|
session.delete(row)
|
|
session.commit()
|
|
return {
|
|
"status": "deleted",
|
|
"bot_id": bot_id,
|
|
"item": payload,
|
|
}
|
|
|
|
|
|
def publish_topic_item(session: Session, bot_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
_get_bot_or_404(session, bot_id)
|
|
return _topic_publish_internal(session, bot_id, payload)
|