imeeting/frontend/src/pages/business/RealtimeAsrSession.tsx

634 lines
24 KiB
TypeScript
Raw Normal View History

import { useEffect, useMemo, useRef, useState } from "react";
import {
Alert,
Badge,
Button,
Card,
Col,
Empty,
Row,
Space,
Statistic,
Tag,
Typography,
message,
} from "antd";
import {
AudioOutlined,
ClockCircleOutlined,
PauseCircleOutlined,
PlayCircleOutlined,
SoundOutlined,
SyncOutlined,
} from "@ant-design/icons";
import { useNavigate, useParams } from "react-router-dom";
import dayjs from "dayjs";
import PageHeader from "../../components/shared/PageHeader";
import {
appendRealtimeTranscripts,
completeRealtimeMeeting,
getMeetingDetail,
getTranscripts,
uploadAudio,
type MeetingTranscriptVO,
type MeetingVO,
type RealtimeTranscriptItemDTO,
} from "../../api/business/meeting";
const { Text, Title } = Typography;
const SAMPLE_RATE = 16000;
const CHUNK_SIZE = 1280;
type WsSpeaker = string | { name?: string; user_id?: string | number } | undefined;
type WsMessage = {
text?: string;
is_final?: boolean;
speaker?: WsSpeaker;
timestamp?: number[][];
};
type TranscriptCard = {
id: string;
speakerName: string;
userId?: string | number;
text: string;
startTime?: number;
endTime?: number;
final: boolean;
};
type RealtimeMeetingSessionDraft = {
meetingId: number;
meetingTitle: string;
asrModelName: string;
summaryModelName: string;
wsUrl: string;
mode: string;
useSpkId: number;
hotwords: Array<{ hotword: string; weight: number }>;
};
function getSessionKey(meetingId: number) {
return `realtimeMeetingSession:${meetingId}`;
}
function buildWavBlob(samples: number[], sampleRate: number) {
const pcmBuffer = new ArrayBuffer(samples.length * 2);
const pcmView = new DataView(pcmBuffer);
for (let i = 0; i < samples.length; i += 1) {
const value = Math.max(-1, Math.min(1, samples[i]));
pcmView.setInt16(i * 2, value < 0 ? value * 0x8000 : value * 0x7fff, true);
}
const wavBuffer = new ArrayBuffer(44 + pcmBuffer.byteLength);
const wavView = new DataView(wavBuffer);
const writeString = (offset: number, text: string) => {
for (let i = 0; i < text.length; i += 1) {
wavView.setUint8(offset + i, text.charCodeAt(i));
}
};
writeString(0, "RIFF");
wavView.setUint32(4, 36 + pcmBuffer.byteLength, true);
writeString(8, "WAVE");
writeString(12, "fmt ");
wavView.setUint32(16, 16, true);
wavView.setUint16(20, 1, true);
wavView.setUint16(22, 1, true);
wavView.setUint32(24, sampleRate, true);
wavView.setUint32(28, sampleRate * 2, true);
wavView.setUint16(32, 2, true);
wavView.setUint16(34, 16, true);
writeString(36, "data");
wavView.setUint32(40, pcmBuffer.byteLength, true);
new Uint8Array(wavBuffer, 44).set(new Uint8Array(pcmBuffer));
return new Blob([wavBuffer], { type: "audio/wav" });
}
function floatTo16BitPCM(input: Float32Array) {
const buffer = new ArrayBuffer(input.length * 2);
const view = new DataView(buffer);
for (let i = 0; i < input.length; i += 1) {
const value = Math.max(-1, Math.min(1, input[i]));
view.setInt16(i * 2, value < 0 ? value * 0x8000 : value * 0x7fff, true);
}
return buffer;
}
function resolveSpeaker(speaker?: WsSpeaker) {
if (!speaker) {
return { speakerId: "spk_0", speakerName: "Unknown", userId: undefined };
}
if (typeof speaker === "string") {
return { speakerId: speaker, speakerName: speaker, userId: undefined };
}
return {
speakerId: speaker.user_id ? String(speaker.user_id) : "spk_0",
speakerName: speaker.name || (speaker.user_id ? String(speaker.user_id) : "Unknown"),
userId: speaker.user_id,
};
}
function formatClock(totalSeconds: number) {
const hours = Math.floor(totalSeconds / 3600);
const minutes = Math.floor((totalSeconds % 3600) / 60);
const seconds = totalSeconds % 60;
if (hours > 0) {
return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
}
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
}
function formatTranscriptTime(ms?: number) {
if (ms === undefined || ms === null) {
return "--:--";
}
const totalSeconds = Math.floor(ms / 1000);
const minutes = Math.floor(totalSeconds / 60);
const seconds = totalSeconds % 60;
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
}
export default function RealtimeAsrSession() {
const navigate = useNavigate();
const { id } = useParams<{ id: string }>();
const meetingId = Number(id);
const [meeting, setMeeting] = useState<MeetingVO | null>(null);
const [sessionDraft, setSessionDraft] = useState<RealtimeMeetingSessionDraft | null>(null);
const [loading, setLoading] = useState(true);
const [recording, setRecording] = useState(false);
const [connecting, setConnecting] = useState(false);
const [finishing, setFinishing] = useState(false);
const [statusText, setStatusText] = useState("待开始");
const [streamingText, setStreamingText] = useState("");
const [streamingSpeaker, setStreamingSpeaker] = useState("Unknown");
const [transcripts, setTranscripts] = useState<TranscriptCard[]>([]);
const [audioLevel, setAudioLevel] = useState(0);
const [elapsedSeconds, setElapsedSeconds] = useState(0);
const transcriptRef = useRef<HTMLDivElement | null>(null);
const wsRef = useRef<WebSocket | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const processorRef = useRef<ScriptProcessorNode | null>(null);
const audioSourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const audioBufferRef = useRef<number[]>([]);
const recordedSamplesRef = useRef<number[]>([]);
const completeOnceRef = useRef(false);
const startedAtRef = useRef<number | null>(null);
const finalTranscriptCount = transcripts.length;
const totalTranscriptChars = useMemo(
() => transcripts.reduce((sum, item) => sum + item.text.length, 0) + streamingText.length,
[streamingText, transcripts],
);
const statusColor = recording ? "#1677ff" : connecting || finishing ? "#faad14" : "#94a3b8";
useEffect(() => {
if (!meetingId || Number.isNaN(meetingId)) {
return;
}
const loadData = async () => {
setLoading(true);
try {
const stored = sessionStorage.getItem(getSessionKey(meetingId));
setSessionDraft(stored ? JSON.parse(stored) : null);
const [detailRes, transcriptRes] = await Promise.all([getMeetingDetail(meetingId), getTranscripts(meetingId)]);
setMeeting(detailRes.data.data);
setTranscripts(
(transcriptRes.data.data || []).map((item: MeetingTranscriptVO) => ({
id: String(item.id),
speakerName: item.speakerName || item.speakerId || "发言人",
text: item.content,
startTime: item.startTime,
endTime: item.endTime,
final: true,
})),
);
} catch {
message.error("加载实时会议失败");
} finally {
setLoading(false);
}
};
void loadData();
}, [meetingId]);
useEffect(() => {
if (!recording) {
setElapsedSeconds(0);
return;
}
const timer = window.setInterval(() => {
if (startedAtRef.current) {
setElapsedSeconds(Math.floor((Date.now() - startedAtRef.current) / 1000));
}
}, 1000);
return () => window.clearInterval(timer);
}, [recording]);
useEffect(() => {
if (!transcriptRef.current) {
return;
}
transcriptRef.current.scrollTop = transcriptRef.current.scrollHeight;
}, [streamingText, transcripts]);
useEffect(() => {
const handlePageHide = () => {
if (!meetingId || completeOnceRef.current) {
return;
}
const token = localStorage.getItem("accessToken");
completeOnceRef.current = true;
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({ is_speaking: false }));
}
fetch(`/api/biz/meeting/${meetingId}/realtime/complete`, {
method: "POST",
keepalive: true,
headers: {
"Content-Type": "application/json",
...(token ? { Authorization: `Bearer ${token}` } : {}),
},
body: JSON.stringify({}),
}).catch(() => undefined);
};
window.addEventListener("pagehide", handlePageHide);
return () => window.removeEventListener("pagehide", handlePageHide);
}, [meetingId]);
const shutdownAudioPipeline = async () => {
processorRef.current?.disconnect();
audioSourceRef.current?.disconnect();
if (streamRef.current) {
streamRef.current.getTracks().forEach((track) => track.stop());
}
if (audioContextRef.current && audioContextRef.current.state !== "closed") {
await audioContextRef.current.close();
}
streamRef.current = null;
processorRef.current = null;
audioSourceRef.current = null;
audioContextRef.current = null;
audioBufferRef.current = [];
const recordedSamples = recordedSamplesRef.current;
recordedSamplesRef.current = [];
setAudioLevel(0);
return recordedSamples.length > 0 ? buildWavBlob(recordedSamples, SAMPLE_RATE) : null;
};
const startAudioPipeline = async () => {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
},
});
const audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
const source = audioContext.createMediaStreamSource(stream);
const processor = audioContext.createScriptProcessor(4096, 1, 1);
streamRef.current = stream;
audioContextRef.current = audioContext;
audioSourceRef.current = source;
processorRef.current = processor;
recordedSamplesRef.current = [];
processor.onaudioprocess = (event) => {
const input = event.inputBuffer.getChannelData(0);
let maxAmplitude = 0;
for (let i = 0; i < input.length; i += 1) {
const amplitude = Math.abs(input[i]);
if (amplitude > maxAmplitude) {
maxAmplitude = amplitude;
}
audioBufferRef.current.push(input[i]);
recordedSamplesRef.current.push(input[i]);
}
setAudioLevel(Math.min(100, Math.round(maxAmplitude * 180)));
while (audioBufferRef.current.length >= CHUNK_SIZE) {
const chunk = audioBufferRef.current.slice(0, CHUNK_SIZE);
audioBufferRef.current = audioBufferRef.current.slice(CHUNK_SIZE);
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(floatTo16BitPCM(new Float32Array(chunk)));
}
}
};
source.connect(processor);
processor.connect(audioContext.destination);
};
const saveFinalTranscript = async (msg: WsMessage) => {
if (!msg.text || !meetingId) {
return;
}
const speaker = resolveSpeaker(msg.speaker);
const item: RealtimeTranscriptItemDTO = {
speakerId: speaker.speakerId,
speakerName: speaker.speakerName,
content: msg.text,
startTime: msg.timestamp?.[0]?.[0],
endTime: msg.timestamp?.[msg.timestamp.length - 1]?.[1],
};
await appendRealtimeTranscripts(meetingId, [item]);
};
const handleStart = async () => {
if (!sessionDraft?.wsUrl) {
message.error("未找到实时识别配置,请返回创建页重新进入");
return;
}
if (recording || connecting) {
return;
}
setConnecting(true);
setStatusText("连接识别服务...");
try {
const socket = new WebSocket(sessionDraft.wsUrl);
socket.binaryType = "arraybuffer";
wsRef.current = socket;
socket.onopen = async () => {
socket.send(JSON.stringify({
mode: sessionDraft.mode || "2pass",
chunk_size: [0, 8, 4],
chunk_interval: 4,
wav_name: `meeting_${meetingId}`,
is_speaking: true,
speaker_name: null,
use_spk_id: sessionDraft.useSpkId,
save_audio: false,
hotwords: sessionDraft.hotwords,
}));
await startAudioPipeline();
startedAtRef.current = Date.now();
setConnecting(false);
setRecording(true);
setStatusText("实时识别中");
};
socket.onmessage = (event) => {
try {
const payload = JSON.parse(event.data) as WsMessage;
if (!payload.text) {
return;
}
const speaker = resolveSpeaker(payload.speaker);
if (payload.is_final) {
setTranscripts((prev) => [
...prev,
{
id: `${Date.now()}-${Math.random()}`,
speakerName: speaker.speakerName,
userId: speaker.userId,
text: payload.text,
startTime: payload.timestamp?.[0]?.[0],
endTime: payload.timestamp?.[payload.timestamp.length - 1]?.[1],
final: true,
},
]);
setStreamingText("");
setStreamingSpeaker("Unknown");
void saveFinalTranscript(payload);
} else {
setStreamingText(payload.text);
setStreamingSpeaker(speaker.speakerName);
}
} catch {
// ignore invalid payload
}
};
socket.onerror = () => {
setConnecting(false);
setRecording(false);
setStatusText("连接失败");
message.error("实时识别 WebSocket 连接失败");
};
socket.onclose = () => {
setConnecting(false);
setRecording(false);
};
} catch {
setConnecting(false);
setStatusText("启动失败");
message.error("启动实时识别失败");
}
};
const handleStop = async (navigateAfterStop = true) => {
if (!meetingId || completeOnceRef.current) {
return;
}
completeOnceRef.current = true;
setFinishing(true);
setStatusText("结束会议中...");
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({ is_speaking: false }));
}
wsRef.current?.close();
wsRef.current = null;
const audioBlob = await shutdownAudioPipeline();
let uploadedAudioUrl: string | undefined;
if (audioBlob) {
try {
const file = new File([audioBlob], `meeting-${meetingId}.wav`, { type: audioBlob.type || "audio/wav" });
const uploadRes = await uploadAudio(file);
uploadedAudioUrl = uploadRes.data.data;
} catch {
message.warning("会议音频上传失败,已保留转录内容");
}
}
try {
await completeRealtimeMeeting(meetingId, uploadedAudioUrl ? { audioUrl: uploadedAudioUrl } : {});
sessionStorage.removeItem(getSessionKey(meetingId));
setStatusText("已提交总结任务");
message.success("实时会议已结束,正在生成总结");
if (navigateAfterStop) {
navigate(`/meetings/${meetingId}`);
}
} catch {
completeOnceRef.current = false;
setStatusText("结束失败");
} finally {
setRecording(false);
setFinishing(false);
startedAtRef.current = null;
}
};
if (loading) {
return (
<div style={{ padding: 24 }}>
<Card bordered={false} style={{ borderRadius: 18 }}>
<div style={{ textAlign: "center", padding: "96px 0" }}>
<SyncOutlined spin />
</div>
</Card>
</div>
);
}
if (!meeting) {
return (
<div style={{ padding: 24 }}>
<Card bordered={false} style={{ borderRadius: 18 }}>
<Empty description="会议不存在" />
</Card>
</div>
);
}
return (
<div style={{ height: "100%", display: "flex", flexDirection: "column", overflow: "hidden" }}>
<PageHeader
title={meeting.title || "实时识别中"}
subtitle={`会议编号 #${meeting.id} · ${dayjs(meeting.meetingTime).format("YYYY-MM-DD HH:mm")}`}
extra={<Badge color={statusColor} text={statusText} />}
/>
<div style={{ flex: 1, minHeight: 0, overflow: "hidden" }}>
{!sessionDraft ? (
<Card bordered={false} style={{ borderRadius: 18 }}>
<Alert
type="warning"
showIcon
message="缺少实时识别启动配置"
description="这个会议的实时会中配置没有保存在当前浏览器中,请返回创建页重新进入。"
action={<Button size="small" onClick={() => navigate("/meeting-live-create")}></Button>}
/>
</Card>
) : (
<Row gutter={16} style={{ height: "100%" }}>
<Col xs={24} xl={7} style={{ height: "100%" }}>
<Card
bordered={false}
style={{ height: "100%", borderRadius: 18, boxShadow: "0 8px 22px rgba(15,23,42,0.05)" }}
bodyStyle={{ height: "100%", padding: 16, display: "flex", flexDirection: "column" }}
>
<Space direction="vertical" size={16} style={{ width: "100%" }}>
<div style={{ padding: 14, borderRadius: 16, background: "linear-gradient(135deg, #0f172a 0%, #1e40af 60%, #60a5fa 100%)", color: "#fff" }}>
<Space direction="vertical" size={8}>
<Tag color="blue" style={{ width: "fit-content", margin: 0 }}>LIVE SESSION</Tag>
<Title level={4} style={{ color: "#fff", margin: 0 }}></Title>
<Text style={{ color: "rgba(255,255,255,0.82)" }}></Text>
</Space>
</div>
<Space style={{ width: "100%" }}>
<Button type="primary" icon={<PlayCircleOutlined />} disabled={recording || connecting || finishing} loading={connecting} onClick={() => void handleStart()} style={{ flex: 1, height: 42 }}>
</Button>
<Button danger icon={<PauseCircleOutlined />} disabled={(!recording && !connecting) || finishing} loading={finishing} onClick={() => void handleStop(true)} style={{ flex: 1, height: 42 }}>
</Button>
</Space>
<Row gutter={[12, 12]}>
<Col span={12}><Statistic title="已识别片段" value={finalTranscriptCount} /></Col>
<Col span={12}><Statistic title="实时字数" value={totalTranscriptChars} /></Col>
<Col span={12}><Statistic title="已录时长" value={formatClock(elapsedSeconds)} prefix={<ClockCircleOutlined />} /></Col>
<Col span={12}><Statistic title="说话人区分" value={sessionDraft.useSpkId ? "开启" : "关闭"} /></Col>
</Row>
</Space>
<div style={{ marginTop: 12, padding: 14, borderRadius: 14, background: "#fafcff", border: "1px solid #edf2ff" }}>
<Space direction="vertical" size={10} style={{ width: "100%" }}>
<div style={{ display: "flex", justifyContent: "space-between" }}><Text type="secondary">ASR </Text><Text strong>{sessionDraft.asrModelName}</Text></div>
<div style={{ display: "flex", justifyContent: "space-between" }}><Text type="secondary"></Text><Text strong>{sessionDraft.summaryModelName}</Text></div>
<div style={{ display: "flex", justifyContent: "space-between" }}><Text type="secondary"></Text><Text strong>{sessionDraft.mode}</Text></div>
<div style={{ display: "flex", justifyContent: "space-between" }}><Text type="secondary"></Text><Text strong>{sessionDraft.hotwords.length}</Text></div>
<div>
<Text type="secondary"></Text>
<div style={{ marginTop: 8, height: 10, borderRadius: 999, background: "#e2e8f0", overflow: "hidden" }}>
<div style={{ width: `${audioLevel}%`, height: "100%", background: "linear-gradient(90deg, #38bdf8, #2563eb)" }} />
</div>
</div>
</Space>
</div>
<div style={{ marginTop: "auto" }}>
<Alert type="info" showIcon message="异常关闭保护" description="最终转录会实时写入会议;页面关闭时会自动尝试结束会议并触发总结,避免会中内容整体丢失。" />
</div>
</Card>
</Col>
<Col xs={24} xl={17} style={{ height: "100%" }}>
<Card bordered={false} style={{ borderRadius: 18, boxShadow: "0 8px 22px rgba(15,23,42,0.05)", height: "100%" }} bodyStyle={{ padding: 0, height: "100%", display: "flex", flexDirection: "column" }}>
<div style={{ padding: "16px 20px", borderBottom: "1px solid #f0f0f0", display: "flex", alignItems: "center", justifyContent: "space-between", gap: 12, flexShrink: 0 }}>
<div>
<Title level={4} style={{ margin: 0 }}></Title>
<Text type="secondary">稿</Text>
</div>
<Space wrap>
<Tag icon={<SoundOutlined />} color={recording ? "processing" : "default"}>{recording ? "采集中" : connecting ? "连接中" : "待命"}</Tag>
<Tag color="blue">{sessionDraft.asrModelName}</Tag>
</Space>
</div>
<div ref={transcriptRef} style={{ flex: 1, minHeight: 0, overflowY: "auto", padding: 18, background: "linear-gradient(180deg, #f8fafc 0%, #ffffff 65%, #f8fafc 100%)" }}>
{transcripts.length === 0 && !streamingText ? (
<div style={{ height: "100%", display: "flex", alignItems: "center", justifyContent: "center" }}>
<Empty description="会议已创建,点击左侧开始识别即可进入转写" />
</div>
) : (
<Space direction="vertical" size={12} style={{ width: "100%" }}>
{transcripts.map((item) => (
<div key={item.id} style={{ padding: 16, borderRadius: 16, background: "#fff", boxShadow: "0 6px 18px rgba(15,23,42,0.05)", display: "grid", gridTemplateColumns: "46px 1fr", gap: 14 }}>
<div style={{ width: 46, height: 46, borderRadius: "50%", background: "#e6f4ff", color: "#1677ff", display: "flex", alignItems: "center", justifyContent: "center", fontWeight: 700, flexShrink: 0 }}>
{item.speakerName.slice(0, 1).toUpperCase()}
</div>
<div>
<Space wrap size={[8, 8]} style={{ marginBottom: 6 }}>
<Text strong>{item.speakerName}</Text>
{item.userId ? <Tag color="blue">UID: {item.userId}</Tag> : null}
<Tag icon={<ClockCircleOutlined />}>{formatTranscriptTime(item.startTime)} - {formatTranscriptTime(item.endTime)}</Tag>
</Space>
<div style={{ color: "#1f2937", lineHeight: 1.8 }}>{item.text}</div>
</div>
</div>
))}
{streamingText ? (
<div style={{ padding: 16, borderRadius: 16, background: "linear-gradient(135deg, rgba(230,244,255,0.9), rgba(245,250,255,0.96))", border: "1px solid #b7d8ff", display: "grid", gridTemplateColumns: "46px 1fr", gap: 14 }}>
<div style={{ width: 46, height: 46, borderRadius: "50%", background: "#1677ff", color: "#fff", display: "flex", alignItems: "center", justifyContent: "center", fontWeight: 700 }}>
{streamingSpeaker.slice(0, 1).toUpperCase()}
</div>
<div>
<Space wrap size={[8, 8]} style={{ marginBottom: 6 }}>
<Text strong>{streamingSpeaker}</Text>
<Tag color="processing">稿</Tag>
</Space>
<div style={{ color: "#334155", lineHeight: 1.8 }}>{streamingText}</div>
</div>
</div>
) : null}
</Space>
)}
</div>
</Card>
</Col>
</Row>
)}
</div>
</div>
);
}