meeting_memory/meeting_state.py

189 lines
6.5 KiB
Python
Raw Permalink Normal View History

2026-05-15 08:39:57 +00:00
import hashlib
import json
import logging
import os
from datetime import datetime
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
def _item_id(task: str, assignee: str) -> str:
raw = f"{task}|{assignee}"
return hashlib.md5(raw.encode("utf-8")).hexdigest()[:8]
def _metric_id(metric_name: str, owner: str) -> str:
raw = f"{metric_name}|{owner}"
return hashlib.md5(raw.encode("utf-8")).hexdigest()[:8]
class MeetingStateStore:
def __init__(self, state_path: str):
self.state_path = state_path
self._state = self._load()
def _load(self) -> dict:
if os.path.exists(self.state_path):
try:
with open(self.state_path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logger.warning(f"加载状态文件失败,将创建新状态: {e}")
return {
"action_items": {},
"metrics": {},
"meeting_series": {},
"content_hashes": {},
}
def save(self):
os.makedirs(os.path.dirname(self.state_path), exist_ok=True)
with open(self.state_path, "w", encoding="utf-8") as f:
json.dump(self._state, f, ensure_ascii=False, indent=2)
def _ensure_series(self, meeting_title: str, meeting_date: str) -> str:
series_name = self._detect_series(meeting_title)
series = self._state["meeting_series"].get(series_name)
if not series:
series = {"latest_date": meeting_date, "processed_titles": []}
self._state["meeting_series"][series_name] = series
if meeting_date > series.get("latest_date", ""):
series["latest_date"] = meeting_date
if meeting_title not in series["processed_titles"]:
series["processed_titles"].append(meeting_title)
return series_name
def _detect_series(self, title: str) -> str:
import re
cleaned = re.sub(r"\d{4}\w+期)", "", title)
cleaned = re.sub(r"\(\d{4}\w+期\)", "", cleaned)
cleaned = re.sub(r"\d{4}\w+期", "", cleaned)
cleaned = re.sub(r"\d{4}年第\w+次", "", cleaned)
cleaned = cleaned.strip("-_ ")
return cleaned or title
def merge_action_items(
self,
new_items: List[dict],
meeting_title: str,
meeting_date: str,
meeting_filename: str,
) -> List[dict]:
series_name = self._ensure_series(meeting_title, meeting_date)
merged = []
for item in new_items:
task = item.get("task", "")
assignee = item.get("assignee", "")
iid = _item_id(task, assignee)
history_entry = {
"date": meeting_date,
"meeting": meeting_filename,
"status": item.get("status", "待办"),
"priority": item.get("priority", ""),
"deadline": item.get("deadline", ""),
}
existing = self._state["action_items"].get(iid)
if existing:
existing["history"].append(history_entry)
existing["latest"] = history_entry
latest = existing["history"][-1]
item["_item_id"] = iid
item["_history"] = list(existing["history"])
item["status"] = latest["status"]
item["priority"] = latest["priority"]
item["deadline"] = latest["deadline"]
else:
self._state["action_items"][iid] = {
"item_id": iid,
"task": task,
"assignee": assignee,
"series": series_name,
"created_meeting": meeting_filename,
"history": [history_entry],
"latest": history_entry,
}
item["_item_id"] = iid
item["_history"] = [history_entry]
merged.append(item)
return merged
def merge_metrics(
self,
new_metrics: List[dict],
meeting_title: str,
meeting_date: str,
meeting_filename: str,
) -> List[dict]:
merged = []
for m in new_metrics:
metric_name = m.get("metric_name", "")
owner = m.get("owner", "")
mid = _metric_id(metric_name, owner)
history_entry = {
"date": meeting_date,
"meeting": meeting_filename,
"value": m.get("value", ""),
"target": m.get("target", ""),
"trend": m.get("trend", ""),
}
existing = self._state["metrics"].get(mid)
if existing:
existing["history"].append(history_entry)
existing["latest"] = history_entry
item = m
item["_metric_id"] = mid
item["_history"] = list(existing["history"])
else:
self._state["metrics"][mid] = {
"metric_id": mid,
"metric_name": metric_name,
"owner": owner,
"history": [history_entry],
"latest": history_entry,
}
m["_metric_id"] = mid
m["_history"] = [history_entry]
merged.append(m)
return merged
def get_action_item_history(self, item_id: str) -> Optional[dict]:
return self._state["action_items"].get(item_id)
def get_metric_history(self, metric_id: str) -> Optional[dict]:
return self._state["metrics"].get(metric_id)
def get_series_info(self, title: str) -> Optional[dict]:
series_name = self._detect_series(title)
return self._state["meeting_series"].get(series_name)
def has_content_hash(self, content_hash: str) -> bool:
return content_hash in self._state["content_hashes"]
def add_content_hash(self, content_hash: str, title: str, date: str, filename: str):
self._state["content_hashes"][content_hash] = {
"title": title,
"date": date,
"filename": filename,
}
def remove_content_hash(self, content_hash: str):
self._state["content_hashes"].pop(content_hash, None)
def get_stats(self) -> dict:
return {
"action_items_tracked": len(self._state["action_items"]),
"metrics_tracked": len(self._state["metrics"]),
"meeting_series": len(self._state["meeting_series"]),
"content_hashes": len(self._state["content_hashes"]),
}