-
- ASR{selectedAsrModel?.modelName || "-"}
- LLM{selectedSummaryModel?.modelName || "-"}
- WebSocket{resolveWsUrl(selectedAsrModel) || "-"}
+
+
+ 本次识别摘要
+
+
+
+
+
+ ASR
+ {selectedAsrModel?.modelName || "-"}
+
+
+ LLM
+ {selectedSummaryModel?.modelName || "-"}
+
+
+ WebSocket
+
+ {buildRealtimeProxyPreviewUrl()}
+
+
+
+
+
+
+
+ 创建成功后会直接进入识别页,不会在当前页占用麦克风。
+
+
+ } loading={submitting} onClick={() => void handleCreate()}>
+ 创建并进入识别
+
-
-
-
- 创建成功后会直接进入识别页,不会在当前页面占用麦克风。
-
-
- } loading={submitting} onClick={() => void handleCreate()}>
- 创建并进入识别
-
-
-
-
-
+
+
diff --git a/frontend/src/pages/business/RealtimeAsrSession.tsx b/frontend/src/pages/business/RealtimeAsrSession.tsx
index 3779a5e..973db7b 100644
--- a/frontend/src/pages/business/RealtimeAsrSession.tsx
+++ b/frontend/src/pages/business/RealtimeAsrSession.tsx
@@ -1,6 +1,7 @@
-import { useEffect, useMemo, useRef, useState } from "react";
+import { useEffect, useMemo, useRef, useState } from "react";
import {
Alert,
+ Avatar,
Badge,
Button,
Card,
@@ -20,6 +21,7 @@ import {
PlayCircleOutlined,
SoundOutlined,
SyncOutlined,
+ UserOutlined,
} from "@ant-design/icons";
import { useNavigate, useParams } from "react-router-dom";
import dayjs from "dayjs";
@@ -29,10 +31,11 @@ import {
completeRealtimeMeeting,
getMeetingDetail,
getTranscripts,
- uploadAudio,
+ openRealtimeMeetingSocketSession,
type MeetingTranscriptVO,
type MeetingVO,
type RealtimeTranscriptItemDTO,
+ type RealtimeSocketSessionVO,
} from "../../api/business/meeting";
const { Text, Title } = Typography;
@@ -41,6 +44,18 @@ const CHUNK_SIZE = 1280;
type WsSpeaker = string | { name?: string; user_id?: string | number } | undefined;
type WsMessage = {
+ type?: string;
+ code?: number;
+ message?: string;
+ data?: {
+ text?: string;
+ is_final?: boolean;
+ start?: number;
+ end?: number;
+ speaker_id?: string;
+ speaker_name?: string;
+ user_id?: string | number | null;
+ };
text?: string;
is_final?: boolean;
speaker?: WsSpeaker;
@@ -62,9 +77,14 @@ type RealtimeMeetingSessionDraft = {
meetingTitle: string;
asrModelName: string;
summaryModelName: string;
- wsUrl: string;
+ asrModelId: number;
mode: string;
+ language: string;
useSpkId: number;
+ enablePunctuation: boolean;
+ enableItn: boolean;
+ enableTextRefine: boolean;
+ saveAudio: boolean;
hotwords: Array<{ hotword: string; weight: number }>;
};
@@ -72,40 +92,6 @@ function getSessionKey(meetingId: number) {
return `realtimeMeetingSession:${meetingId}`;
}
-function buildWavBlob(samples: number[], sampleRate: number) {
- const pcmBuffer = new ArrayBuffer(samples.length * 2);
- const pcmView = new DataView(pcmBuffer);
- for (let i = 0; i < samples.length; i += 1) {
- const value = Math.max(-1, Math.min(1, samples[i]));
- pcmView.setInt16(i * 2, value < 0 ? value * 0x8000 : value * 0x7fff, true);
- }
-
- const wavBuffer = new ArrayBuffer(44 + pcmBuffer.byteLength);
- const wavView = new DataView(wavBuffer);
- const writeString = (offset: number, text: string) => {
- for (let i = 0; i < text.length; i += 1) {
- wavView.setUint8(offset + i, text.charCodeAt(i));
- }
- };
-
- writeString(0, "RIFF");
- wavView.setUint32(4, 36 + pcmBuffer.byteLength, true);
- writeString(8, "WAVE");
- writeString(12, "fmt ");
- wavView.setUint32(16, 16, true);
- wavView.setUint16(20, 1, true);
- wavView.setUint16(22, 1, true);
- wavView.setUint32(24, sampleRate, true);
- wavView.setUint32(28, sampleRate * 2, true);
- wavView.setUint16(32, 2, true);
- wavView.setUint16(34, 16, true);
- writeString(36, "data");
- wavView.setUint32(40, pcmBuffer.byteLength, true);
- new Uint8Array(wavBuffer, 44).set(new Uint8Array(pcmBuffer));
-
- return new Blob([wavBuffer], { type: "audio/wav" });
-}
-
function floatTo16BitPCM(input: Float32Array) {
const buffer = new ArrayBuffer(input.length * 2);
const view = new DataView(buffer);
@@ -150,6 +136,46 @@ function formatTranscriptTime(ms?: number) {
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
}
+function toMs(value?: number) {
+ if (value === undefined || value === null || Number.isNaN(value)) {
+ return undefined;
+ }
+ return Math.round(value * 1000);
+}
+
+function buildRealtimeProxyWsUrl(socketSession: RealtimeSocketSessionVO) {
+ const protocol = window.location.protocol === "https:" ? "wss" : "ws";
+ return `${protocol}://${window.location.host}${socketSession.path}?sessionToken=${encodeURIComponent(socketSession.sessionToken)}`;
+}
+
+function normalizeWsMessage(payload: WsMessage) {
+ if (payload.type === "partial" || payload.type === "segment") {
+ const data = payload.data || {};
+ return {
+ text: data.text || "",
+ isFinal: payload.type === "segment" || !!data.is_final,
+ speaker: {
+ name: data.speaker_name,
+ user_id: data.user_id ?? data.speaker_id,
+ } as WsSpeaker,
+ startTime: toMs(data.start),
+ endTime: toMs(data.end),
+ };
+ }
+
+ if (!payload.text) {
+ return null;
+ }
+
+ return {
+ text: payload.text,
+ isFinal: !!payload.is_final,
+ speaker: payload.speaker,
+ startTime: payload.timestamp?.[0]?.[0],
+ endTime: payload.timestamp?.[payload.timestamp.length - 1]?.[1],
+ };
+}
+
export default function RealtimeAsrSession() {
const navigate = useNavigate();
const { id } = useParams<{ id: string }>();
@@ -175,7 +201,6 @@ export default function RealtimeAsrSession() {
const audioSourceRef = useRef
(null);
const streamRef = useRef(null);
const audioBufferRef = useRef([]);
- const recordedSamplesRef = useRef([]);
const completeOnceRef = useRef(false);
const startedAtRef = useRef(null);
@@ -278,13 +303,14 @@ export default function RealtimeAsrSession() {
audioSourceRef.current = null;
audioContextRef.current = null;
audioBufferRef.current = [];
- const recordedSamples = recordedSamplesRef.current;
- recordedSamplesRef.current = [];
setAudioLevel(0);
- return recordedSamples.length > 0 ? buildWavBlob(recordedSamples, SAMPLE_RATE) : null;
};
const startAudioPipeline = async () => {
+ if (!window.isSecureContext || !navigator.mediaDevices?.getUserMedia) {
+ throw new Error("当前浏览器环境不支持麦克风访问。请使用 localhost 或 HTTPS 域名访问系统。");
+ }
+
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
@@ -300,7 +326,6 @@ export default function RealtimeAsrSession() {
audioContextRef.current = audioContext;
audioSourceRef.current = source;
processorRef.current = processor;
- recordedSamplesRef.current = [];
processor.onaudioprocess = (event) => {
const input = event.inputBuffer.getChannelData(0);
@@ -311,7 +336,6 @@ export default function RealtimeAsrSession() {
maxAmplitude = amplitude;
}
audioBufferRef.current.push(input[i]);
- recordedSamplesRef.current.push(input[i]);
}
setAudioLevel(Math.min(100, Math.round(maxAmplitude * 180)));
@@ -329,23 +353,28 @@ export default function RealtimeAsrSession() {
processor.connect(audioContext.destination);
};
- const saveFinalTranscript = async (msg: WsMessage) => {
- if (!msg.text || !meetingId) {
+ const saveFinalTranscript = async (normalized: {
+ text: string;
+ speaker?: WsSpeaker;
+ startTime?: number;
+ endTime?: number;
+ }) => {
+ if (!normalized.text || !meetingId) {
return;
}
- const speaker = resolveSpeaker(msg.speaker);
+ const speaker = resolveSpeaker(normalized.speaker);
const item: RealtimeTranscriptItemDTO = {
speakerId: speaker.speakerId,
speakerName: speaker.speakerName,
- content: msg.text,
- startTime: msg.timestamp?.[0]?.[0],
- endTime: msg.timestamp?.[msg.timestamp.length - 1]?.[1],
+ content: normalized.text,
+ startTime: normalized.startTime,
+ endTime: normalized.endTime,
};
await appendRealtimeTranscripts(meetingId, [item]);
};
const handleStart = async () => {
- if (!sessionDraft?.wsUrl) {
+ if (!sessionDraft?.asrModelId) {
message.error("未找到实时识别配置,请返回创建页重新进入");
return;
}
@@ -356,22 +385,24 @@ export default function RealtimeAsrSession() {
setConnecting(true);
setStatusText("连接识别服务...");
try {
- const socket = new WebSocket(sessionDraft.wsUrl);
+ const socketSessionRes = await openRealtimeMeetingSocketSession(meetingId, {
+ asrModelId: sessionDraft.asrModelId,
+ mode: sessionDraft.mode || "2pass",
+ language: sessionDraft.language || "auto",
+ useSpkId: sessionDraft.useSpkId,
+ enablePunctuation: sessionDraft.enablePunctuation !== false,
+ enableItn: sessionDraft.enableItn !== false,
+ enableTextRefine: !!sessionDraft.enableTextRefine,
+ saveAudio: !!sessionDraft.saveAudio,
+ hotwords: sessionDraft.hotwords || [],
+ });
+ const socketSession = socketSessionRes.data.data;
+ const socket = new WebSocket(buildRealtimeProxyWsUrl(socketSession));
socket.binaryType = "arraybuffer";
wsRef.current = socket;
socket.onopen = async () => {
- socket.send(JSON.stringify({
- mode: sessionDraft.mode || "2pass",
- chunk_size: [0, 8, 4],
- chunk_interval: 4,
- wav_name: `meeting_${meetingId}`,
- is_speaking: true,
- speaker_name: null,
- use_spk_id: sessionDraft.useSpkId,
- save_audio: false,
- hotwords: sessionDraft.hotwords,
- }));
+ socket.send(JSON.stringify(socketSession.startMessage || {}));
await startAudioPipeline();
startedAtRef.current = Date.now();
setConnecting(false);
@@ -382,29 +413,36 @@ export default function RealtimeAsrSession() {
socket.onmessage = (event) => {
try {
const payload = JSON.parse(event.data) as WsMessage;
- if (!payload.text) {
+ if (payload.code && payload.message) {
+ setStatusText(payload.message);
+ message.error(payload.message);
return;
}
- const speaker = resolveSpeaker(payload.speaker);
- if (payload.is_final) {
+ const normalized = normalizeWsMessage(payload);
+ if (!normalized) {
+ return;
+ }
+
+ const speaker = resolveSpeaker(normalized.speaker);
+ if (normalized.isFinal) {
setTranscripts((prev) => [
...prev,
{
id: `${Date.now()}-${Math.random()}`,
speakerName: speaker.speakerName,
userId: speaker.userId,
- text: payload.text,
- startTime: payload.timestamp?.[0]?.[0],
- endTime: payload.timestamp?.[payload.timestamp.length - 1]?.[1],
+ text: normalized.text,
+ startTime: normalized.startTime,
+ endTime: normalized.endTime,
final: true,
},
]);
setStreamingText("");
setStreamingSpeaker("Unknown");
- void saveFinalTranscript(payload);
+ void saveFinalTranscript(normalized);
} else {
- setStreamingText(payload.text);
+ setStreamingText(normalized.text);
setStreamingSpeaker(speaker.speakerName);
}
} catch {
@@ -423,10 +461,10 @@ export default function RealtimeAsrSession() {
setConnecting(false);
setRecording(false);
};
- } catch {
+ } catch (error) {
setConnecting(false);
setStatusText("启动失败");
- message.error("启动实时识别失败");
+ message.error(error instanceof Error ? error.message : "启动实时识别失败");
}
};
@@ -445,20 +483,10 @@ export default function RealtimeAsrSession() {
wsRef.current?.close();
wsRef.current = null;
- const audioBlob = await shutdownAudioPipeline();
- let uploadedAudioUrl: string | undefined;
- if (audioBlob) {
- try {
- const file = new File([audioBlob], `meeting-${meetingId}.wav`, { type: audioBlob.type || "audio/wav" });
- const uploadRes = await uploadAudio(file);
- uploadedAudioUrl = uploadRes.data.data;
- } catch {
- message.warning("会议音频上传失败,已保留转录内容");
- }
- }
+ await shutdownAudioPipeline();
try {
- await completeRealtimeMeeting(meetingId, uploadedAudioUrl ? { audioUrl: uploadedAudioUrl } : {});
+ await completeRealtimeMeeting(meetingId, {});
sessionStorage.removeItem(getSessionKey(meetingId));
setStatusText("已提交总结任务");
message.success("实时会议已结束,正在生成总结");
@@ -499,6 +527,80 @@ export default function RealtimeAsrSession() {
return (
+
navigate("/meeting-live-create")}>返回创建页}
/>
@@ -529,7 +631,7 @@ export default function RealtimeAsrSession() {
LIVE SESSION
会中实时识别
- 会中页只保留控制区和实时转写流。
+ 会中页面只保留控制区和实时转写流。
@@ -586,37 +688,35 @@ export default function RealtimeAsrSession() {
{transcripts.length === 0 && !streamingText ? (
-
+
) : (
{transcripts.map((item) => (
-
-
- {item.speakerName.slice(0, 1).toUpperCase()}
-
-
-
- {item.speakerName}
+
+
{formatTranscriptTime(item.startTime)}
+
+
+
} className="transcript-avatar" />
+
{item.speakerName}
{item.userId ?
UID: {item.userId} : null}
-
}>{formatTranscriptTime(item.startTime)} - {formatTranscriptTime(item.endTime)}
-
-
{item.text}
+
{formatTranscriptTime(item.startTime)} - {formatTranscriptTime(item.endTime)}
+
+
{item.text}
))}
{streamingText ? (
-
-
- {streamingSpeaker.slice(0, 1).toUpperCase()}
-
-
-
- {streamingSpeaker}
+
+
--:--
+
+
+
} className="transcript-avatar" />
+
{streamingSpeaker}
流式草稿
-
-
{streamingText}
+
+
{streamingText}
) : null}
@@ -631,3 +731,4 @@ export default function RealtimeAsrSession() {
);
}
+
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
index e7f0c4a..93599d9 100644
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -16,7 +16,11 @@ export default defineConfig({
proxy: {
"/auth": "http://localhost:8081",
"/sys": "http://localhost:8081",
- "/api": "http://localhost:8081"
+ "/api": "http://localhost:8081",
+ "/ws": {
+ target: "ws://localhost:8081",
+ ws: true
+ }
}
}
});