dashboard-nanobot/frontend/src/modules/dashboard/messageParser.ts

import type { BotEvent, ChatMessage } from '../../types/bot';

const ANSI_RE = /\x1b\[[0-9;?]*[ -/]*[@-~]/g;
const OSC_RE = /\x1b\][^\u0007]*(\u0007|\x1b\\)/g;
const NON_TEXT_RE = /[^\u0009\u0020-\u007E\u4E00-\u9FFF。，！？：；、“”‘’（）《》【】—…·\-_./:\\,%+*='"`|<>]/g;
const CONTROL_RE = /[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g;
const ATTACHMENT_BLOCK_RE = /\[Attached Files\][\s\S]*?\[\/Attached Files\]/gi;

function cleanLine(line: string) {
  return line
    .replace(OSC_RE, '')
    .replace(ANSI_RE, '')
    .replace(/\[(\?|\d|;)+[A-Za-z]/g, '')
    .replace(/\[(\d+)?K/g, '')
    .replace(NON_TEXT_RE, ' ')
    .replace(/\s+/g, ' ')
    .trim();
}

export function normalizeUserMessageText(input: string) {
  let text = (input || '').replace(/\r\n/g, '\n').trim();
  if (!text) return '';

  // Keep attachment list out of editable/visible command text.
  text = text.replace(ATTACHMENT_BLOCK_RE, '').trim();

  // Strip role prefixes injected by some gateways, e.g. "You: ...".
  text = text.replace(/(^|\n)\s*(you|user|你)\s*[:：]\s*/gi, '$1').trim();
  text = text.replace(/\n{3,}/g, '\n\n');

  // Collapse duplicate echoes like "xxx You: xxx" / "xxx xxx".
  const flat = text.replace(/\s+/g, ' ').trim();
  const prefixedRepeat = flat.match(/^(.{4,}?)\s+(you|user|你)\s*[:：]\s*\1$/iu);
  if (prefixedRepeat) return prefixedRepeat[1].trim();
  const exactRepeat = flat.match(/^(.{4,}?)\s+\1$/u);
  if (exactRepeat) return exactRepeat[1].trim();

  return text;
}

export function normalizeAssistantMessageText(input: string) {
  let text = String(input || '')
    .replace(OSC_RE, '')
    .replace(ANSI_RE, '')
    .replace(CONTROL_RE, '')
    .replace(/\r\n/g, '\n')
    .replace(/\r/g, '\n')
    .trim();
  if (!text) return '';

  // Remove dashboard wrapper if channel accidentally outputs raw marker line.
  text = text.replace(/__DASHBOARD_DATA_START__/g, '').replace(/__DASHBOARD_DATA_END__/g, '').trim();

  // Reduce excessive blank lines while keeping markdown readability.
  text = text.replace(/\n{4,}/g, '\n\n\n');
  return text;
}

export function summarizeProgressText(input: string, isZh: boolean) {
  const raw = normalizeAssistantMessageText(input);
  if (!raw) return isZh ? '处理中...' : 'Processing...';
  const firstLine = raw
    .split('\n')
    .map((v) => v.trim())
    .find((v) => v.length > 0);
  const line = (firstLine || raw)
    .replace(/[`*_>#|\[\]\(\)]/g, ' ')
    .replace(/\s+/g, ' ')
    .trim();
  if (!line) return isZh ? '处理中...' : 'Processing...';
  return line.length > 96 ? `${line.slice(0, 96)}...` : line;
}

/**
 * 核心逻辑：日志解析器仅用于提取“状态事件”（用于显示思考气泡）。
 * 所有的正式对话气泡（用户指令、AI回复）必须由结构化总线消息驱动。
 */
export function parseLogToArtifacts(
  raw: string,
  ts: number = Date.now(),
): { message?: ChatMessage; event?: BotEvent } | null {
  const line = cleanLine(raw);
  if (!line || line.length < 3) return null;
  const lower = line.toLowerCase();

  // 1. 忽略结构化标签、系统日志和心跳干扰
  if (
    lower.includes('__dashboard_data') || 
    lower.includes('litellm') ||
    lower.includes('heartbeat') ||
    lower.includes('starting nanobot gateway')
  ) {
    return null;
  }

  // 2. 仅提取思考/工具执行状态
  if (lower.includes('nanobot is thinking')) {
    return { event: { state: 'THINKING', text: 'Thinking', ts } };
  }

  const toolMatch = line.match(/execut(?:e|ing) tool[:\s]+([\w\-./]+)/i);
  if (toolMatch) {
    return { event: { state: 'TOOL_CALL', text: `Executing Tool: ${toolMatch[1]}`, ts } };
  }

  // 3. 错误状态提取
  if (lower.includes('traceback') || (lower.includes('error') && !lower.includes('no error'))) {
    return { event: { state: 'ERROR', text: 'Execution Error', ts } };
  }

  // 绝对不返回 message 对象
  return null;
}