my_meeting/meeting_summary.py

146 lines
5.2 KiB
Python
Raw Permalink Normal View History

2026-05-09 03:23:57 +00:00
import argparse
import json
import time
from pathlib import Path
from agents.chat import get_qwen_response
from prompt_loader import load_prompt
PROJECT_ROOT = Path(__file__).resolve().parent
DATA_DIR = PROJECT_ROOT / "data" / "meetings"
RESULTS_MD_DIR = PROJECT_ROOT / "data" / "results" / "md"
RESULTS_JSON_DIR = PROJECT_ROOT / "data" / "results" / "json"
TEMPLATE_DIR = PROJECT_ROOT / "template"
2026-05-09 08:52:09 +00:00
TEMPLATE_GUIDE_DIR = PROJECT_ROOT / "template_guides"
2026-05-09 03:23:57 +00:00
EXAMPLES_DIR = PROJECT_ROOT / "examples"
def parse_args():
parser = argparse.ArgumentParser(description="Generate meeting topics and summary.")
parser.add_argument("--meeting-id", help="Meeting ID under data/meetings")
parser.add_argument("--input", help="Path to a transcript file (.txt or .md)")
parser.add_argument("--template", default="template1.md", help="Template file name under template/")
parser.add_argument("--model", default="Qwen3.6-35B", help="LLM model name")
args = parser.parse_args()
if bool(args.meeting_id) == bool(args.input):
parser.error("Use exactly one of --meeting-id or --input")
return args
def load_transcript(args) -> tuple[str, str, Path]:
if args.meeting_id:
meeting_dir = DATA_DIR / args.meeting_id
if not meeting_dir.exists():
raise FileNotFoundError(f"Meeting not found: {args.meeting_id}")
for ext in (".txt", ".md"):
transcript_path = meeting_dir / f"transcript{ext}"
if transcript_path.exists():
return args.meeting_id, transcript_path.read_text(encoding="utf-8"), transcript_path
raise FileNotFoundError(f"No transcript file found for meeting: {args.meeting_id}")
transcript_path = Path(args.input).resolve()
if not transcript_path.exists():
raise FileNotFoundError(f"Input file not found: {transcript_path}")
return transcript_path.stem, transcript_path.read_text(encoding="utf-8"), transcript_path
def read_template(template_name: str) -> str:
template_path = TEMPLATE_DIR / template_name
if not template_path.exists():
raise FileNotFoundError(f"Template not found: {template_name}")
return template_path.read_text(encoding="utf-8")
2026-05-09 08:52:09 +00:00
def read_template_guide(template_name: str) -> str:
guide_path = TEMPLATE_GUIDE_DIR / template_name
if not guide_path.exists():
return ""
return guide_path.read_text(encoding="utf-8")
2026-05-09 03:23:57 +00:00
def collect_stream(response) -> str:
content = []
current_part = None
for chunk_type, chunk_content in response:
if not chunk_content:
continue
if chunk_type == "reasoning":
if current_part != "reasoning":
print("\n[Thinking]\n")
current_part = "reasoning"
print(chunk_content, end="", flush=True)
else:
if current_part != "content":
print("\n[Content]\n")
current_part = "content"
print(chunk_content, end="", flush=True)
content.append(str(chunk_content))
print()
return "".join(content)
def save_outputs(target_name: str, meeting_id: str | None, sub_topics: str, summary_text: str):
if meeting_id:
json_dir = RESULTS_JSON_DIR / meeting_id
md_dir = RESULTS_MD_DIR / meeting_id
else:
json_dir = EXAMPLES_DIR
md_dir = EXAMPLES_DIR
json_dir.mkdir(parents=True, exist_ok=True)
md_dir.mkdir(parents=True, exist_ok=True)
json_path = json_dir / "sub_topic.json"
try:
json_path.write_text(
json.dumps(json.loads(sub_topics), ensure_ascii=False, indent=2),
encoding="utf-8",
)
except json.JSONDecodeError:
json_path.write_text(sub_topics, encoding="utf-8")
summary_path = md_dir / "meeting_summary.md"
summary_path.write_text(summary_text, encoding="utf-8")
print(f"\nSaved topics to: {json_path}")
print(f"Saved summary to: {summary_path}")
print(f"Processed target: {target_name}")
def main():
args = parse_args()
started_at = time.perf_counter()
target_name, transcript, transcript_path = load_transcript(args)
template = read_template(args.template)
2026-05-09 08:52:09 +00:00
template_guide = read_template_guide(args.template)
2026-05-09 03:23:57 +00:00
prompt = load_prompt("meeting_summary", "zh")
print(f"Processing transcript: {transcript_path}")
if args.meeting_id:
print(f"Meeting ID: {args.meeting_id}")
system_prompt = prompt["system"]["role"] + prompt["mode_contracts"]["data_preproces"]
user_prompt = prompt["user_template"]["article_preproces"].format(article=transcript)
sub_topics = collect_stream(get_qwen_response(args.model, system_prompt, user_prompt))
system_prompt = prompt["system"]["role"] + prompt["mode_contracts"]["data_summary"].format(template=template)
2026-05-09 08:52:09 +00:00
if template_guide:
system_prompt += f"\n\n模板使用说明:\n{template_guide}"
2026-05-09 03:23:57 +00:00
user_prompt = prompt["user_template"]["article_summary"].format(article=transcript, sub_topices=sub_topics)
summary_text = collect_stream(get_qwen_response(args.model, system_prompt, user_prompt))
save_outputs(target_name, args.meeting_id, sub_topics, summary_text)
elapsed = time.perf_counter() - started_at
print(f"Elapsed: {elapsed:.2f}s")
if __name__ == "__main__":
main()