From 9d1a8710af2edc349a351a76eb7c038ea6e4b2e2 Mon Sep 17 00:00:00 2001 From: chenhao Date: Mon, 30 Mar 2026 17:56:30 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0=E5=AE=9E=E6=97=B6?= =?UTF-8?q?=E4=BC=9A=E8=AE=AE=E9=85=8D=E7=BD=AE=E9=80=89=E9=A1=B9=E5=92=8C?= =?UTF-8?q?WebSocket=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 `RealtimeAsr` 组件中添加语言、标点、ITN、文本修正和音频保存等配置选项 - 添加构建WebSocket URL的函数 `buildRealtimeProxyPreviewUrl` - 更新 `meeting.ts` API,增加 `openRealtimeMeetingSocketSession` 接口 - 更新 `vite.config.ts`,添加WebSocket代理配置 - 优化 `RealtimeAsrSession` 组件,处理WebSocket消息并支持新的配置选项 --- backend/pom.xml | 4 + .../java/com/imeeting/common/RedisKeys.java | 4 + .../controller/biz/MeetingController.java | 26 ++ .../service/biz/impl/AiTaskServiceImpl.java | 134 +++--- .../biz/impl/MeetingCommandServiceImpl.java | 29 +- frontend/src/api/business/meeting.ts | 29 ++ frontend/src/pages/business/RealtimeAsr.tsx | 414 ++++++++++++------ .../src/pages/business/RealtimeAsrSession.tsx | 313 ++++++++----- frontend/vite.config.ts | 6 +- 9 files changed, 659 insertions(+), 300 deletions(-) diff --git a/backend/pom.xml b/backend/pom.xml index 8c2ebaf..1a3188c 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -27,6 +27,10 @@ org.springframework.boot spring-boot-starter-web + + org.springframework.boot + spring-boot-starter-websocket + org.springframework.boot spring-boot-starter-security diff --git a/backend/src/main/java/com/imeeting/common/RedisKeys.java b/backend/src/main/java/com/imeeting/common/RedisKeys.java index a0a2d09..e2b60d3 100644 --- a/backend/src/main/java/com/imeeting/common/RedisKeys.java +++ b/backend/src/main/java/com/imeeting/common/RedisKeys.java @@ -43,6 +43,10 @@ public final class RedisKeys { return "biz:meeting:polling:lock:" + meetingId; } + public static String realtimeMeetingSocketSessionKey(String sessionToken) { + return "biz:meeting:realtime:socket:" + sessionToken; + } + public static final String CACHE_EMPTY_MARKER = "EMPTY_MARKER"; public static final String SYS_PARAM_FIELD_VALUE = "value"; public static final String SYS_PARAM_FIELD_TYPE = "type"; diff --git a/backend/src/main/java/com/imeeting/controller/biz/MeetingController.java b/backend/src/main/java/com/imeeting/controller/biz/MeetingController.java index 9023843..1f77697 100644 --- a/backend/src/main/java/com/imeeting/controller/biz/MeetingController.java +++ b/backend/src/main/java/com/imeeting/controller/biz/MeetingController.java @@ -8,7 +8,9 @@ import com.imeeting.dto.biz.MeetingSpeakerUpdateDTO; import com.imeeting.dto.biz.MeetingSummaryExportResult; import com.imeeting.dto.biz.MeetingTranscriptVO; import com.imeeting.dto.biz.MeetingVO; +import com.imeeting.dto.biz.OpenRealtimeSocketSessionCommand; import com.imeeting.dto.biz.RealtimeMeetingCompleteDTO; +import com.imeeting.dto.biz.RealtimeSocketSessionVO; import com.imeeting.dto.biz.RealtimeTranscriptItemDTO; import com.imeeting.dto.biz.UpdateMeetingBasicCommand; import com.imeeting.dto.biz.UpdateMeetingParticipantsCommand; @@ -20,6 +22,7 @@ import com.imeeting.service.biz.MeetingCommandService; import com.imeeting.service.biz.MeetingExportService; import com.imeeting.service.biz.MeetingQueryService; import com.imeeting.service.biz.PromptTemplateService; +import com.imeeting.service.biz.RealtimeMeetingSocketSessionService; import com.unisbase.common.ApiResponse; import com.unisbase.dto.PageResult; import com.unisbase.security.LoginUser; @@ -59,6 +62,7 @@ public class MeetingController { private final MeetingAccessService meetingAccessService; private final MeetingExportService meetingExportService; private final PromptTemplateService promptTemplateService; + private final RealtimeMeetingSocketSessionService realtimeMeetingSocketSessionService; private final StringRedisTemplate redisTemplate; private final String uploadPath; private final String resourcePrefix; @@ -68,6 +72,7 @@ public class MeetingController { MeetingAccessService meetingAccessService, MeetingExportService meetingExportService, PromptTemplateService promptTemplateService, + RealtimeMeetingSocketSessionService realtimeMeetingSocketSessionService, StringRedisTemplate redisTemplate, @Value("${unisbase.app.upload-path}") String uploadPath, @Value("${unisbase.app.resource-prefix}") String resourcePrefix) { @@ -76,6 +81,7 @@ public class MeetingController { this.meetingAccessService = meetingAccessService; this.meetingExportService = meetingExportService; this.promptTemplateService = promptTemplateService; + this.realtimeMeetingSocketSessionService = realtimeMeetingSocketSessionService; this.redisTemplate = redisTemplate; this.uploadPath = uploadPath; this.resourcePrefix = resourcePrefix; @@ -225,6 +231,26 @@ public class MeetingController { return ApiResponse.ok(true); } + @PostMapping("/{id}/realtime/socket-session") + @PreAuthorize("isAuthenticated()") + public ApiResponse openRealtimeSocketSession(@PathVariable Long id, + @RequestBody OpenRealtimeSocketSessionCommand command) { + LoginUser loginUser = currentLoginUser(); + return ApiResponse.ok(realtimeMeetingSocketSessionService.createSession( + id, + command.getAsrModelId(), + command.getMode(), + command.getLanguage(), + command.getUseSpkId(), + command.getEnablePunctuation(), + command.getEnableItn(), + command.getEnableTextRefine(), + command.getSaveAudio(), + command.getHotwords(), + loginUser + )); + } + @PostMapping("/{id}/realtime/complete") @PreAuthorize("isAuthenticated()") public ApiResponse completeRealtimeMeeting(@PathVariable Long id, @RequestBody(required = false) RealtimeMeetingCompleteDTO dto) { diff --git a/backend/src/main/java/com/imeeting/service/biz/impl/AiTaskServiceImpl.java b/backend/src/main/java/com/imeeting/service/biz/impl/AiTaskServiceImpl.java index ad26cc1..023dd0e 100644 --- a/backend/src/main/java/com/imeeting/service/biz/impl/AiTaskServiceImpl.java +++ b/backend/src/main/java/com/imeeting/service/biz/impl/AiTaskServiceImpl.java @@ -131,7 +131,7 @@ public class AiTaskServiceImpl extends ServiceImpl impleme List transcripts = transcriptMapper.selectList(new LambdaQueryWrapper() .eq(MeetingTranscript::getMeetingId, meetingId) .orderByAsc(MeetingTranscript::getStartTime)); - + if (transcripts.isEmpty()) { throw new RuntimeException("没有找到可用的转录文本,无法生成总结"); } @@ -157,11 +157,11 @@ public class AiTaskServiceImpl extends ServiceImpl impleme private String processAsrTask(Meeting meeting, AiTask taskRecord) throws Exception { updateMeetingStatus(meeting.getId(), 1); - + taskRecord.setStatus(1); taskRecord.setStartedAt(LocalDateTime.now()); this.updateById(taskRecord); - + Long asrModelId = Long.valueOf(taskRecord.getTaskConfig().get("asrModelId").toString()); AiModelVO asrModel = aiModelService.getModelById(asrModelId, "ASR"); if (asrModel == null) throw new RuntimeException("ASR模型配置不存在"); @@ -173,7 +173,7 @@ public class AiTaskServiceImpl extends ServiceImpl impleme Map req = buildAsrRequest(meeting, taskRecord, asrModel); taskRecord.setRequestData(req); this.updateById(taskRecord); - + String respBody = postJson(submitUrl, req, asrModel.getApiKey()); JsonNode submitNode = objectMapper.readTree(respBody); if (submitNode.path("code").asInt() != 0) { @@ -185,7 +185,7 @@ public class AiTaskServiceImpl extends ServiceImpl impleme this.updateById(taskRecord); String queryUrl = appendPath(asrModel.getBaseUrl(), "api/v1/asr/transcriptions/" + taskId); - + // 轮询逻辑 (带防卡死防护) JsonNode resultNode = null; int lastPercent = -1; @@ -208,7 +208,7 @@ public class AiTaskServiceImpl extends ServiceImpl impleme } else { int currentPercent = data.path("percentage").asInt(); int eta = data.path("eta_seconds").asInt(statusNode.path("eta_seconds").asInt(data.path("eta").asInt(0))); - updateProgress(meeting.getId(), (int)(currentPercent * 0.85), data.path("message").asText(), eta); + updateProgress(meeting.getId(), (int) (currentPercent * 0.85), data.path("message").asText(), eta); if (currentPercent > 0 && currentPercent == lastPercent) { if (++unchangedCount > 300) throw new RuntimeException("识别任务长时间无进度增长,自动强制超时"); @@ -230,8 +230,11 @@ public class AiTaskServiceImpl extends ServiceImpl impleme String rawAudioUrl = meeting.getAudioUrl(); String encodedAudioUrl = Arrays.stream(rawAudioUrl.split("/")) .map(part -> { - try { return URLEncoder.encode(part, StandardCharsets.UTF_8).replace("+", "%20"); } - catch (Exception e) { return part; } + try { + return URLEncoder.encode(part, StandardCharsets.UTF_8).replace("+", "%20"); + } catch (Exception e) { + return part; + } }) .collect(Collectors.joining("/")); req.put("file_url", serverBaseUrl + (encodedAudioUrl.startsWith("/") ? "" : "/") + encodedAudioUrl); @@ -240,13 +243,12 @@ public class AiTaskServiceImpl extends ServiceImpl impleme if (asrModel.getModelCode() != null && !asrModel.getModelCode().isBlank()) { config.put("model", asrModel.getModelCode()); } - + Object useSpkObj = taskRecord.getTaskConfig().get("useSpkId"); boolean useSpk = useSpkObj != null && useSpkObj.toString().equals("1"); config.put("enable_speaker", useSpk); config.put("enable_two_pass", true); - List> hotwords = new ArrayList<>(); Object hotWordsObj = taskRecord.getTaskConfig().get("hotWords"); if (hotWordsObj instanceof List) { @@ -254,7 +256,8 @@ public class AiTaskServiceImpl extends ServiceImpl impleme if (!words.isEmpty()) { List entities = hotWordService.list(new LambdaQueryWrapper() .eq(HotWord::getTenantId, meeting.getTenantId()).in(HotWord::getWord, words)); - Map weightMap = entities.stream().collect(Collectors.toMap(HotWord::getWord, HotWord::getWeight, (v1, v2) -> v1)); + Map weightMap = entities.stream() + .collect(Collectors.toMap(HotWord::getWord, HotWord::getWeight, (v1, v2) -> v1)); for (String w : words) { hotwords.add(Map.of("hotword", w, "weight", weightMap.getOrDefault(w, 10) / 10.0)); } @@ -269,7 +272,7 @@ public class AiTaskServiceImpl extends ServiceImpl impleme protected String saveTranscripts(Meeting meeting, JsonNode resultNode) { // 关键:入库前清理旧记录,防止恢复任务导致数据重复 transcriptMapper.delete(new LambdaQueryWrapper().eq(MeetingTranscript::getMeetingId, meeting.getId())); - + StringBuilder sb = new StringBuilder(); JsonNode segments = resultNode.path("segments"); if (segments.isArray()) { @@ -277,7 +280,7 @@ public class AiTaskServiceImpl extends ServiceImpl impleme for (JsonNode seg : segments) { MeetingTranscript mt = new MeetingTranscript(); mt.setMeetingId(meeting.getId()); - + String spkId = extractSpeakerId(seg); String spkName = resolveTranscriptSpeakerName(seg, spkId); @@ -390,24 +393,24 @@ public class AiTaskServiceImpl extends ServiceImpl impleme private void processSummaryTask(Meeting meeting, String asrText, AiTask taskRecord) throws Exception { updateMeetingStatus(meeting.getId(), 2); updateProgress(meeting.getId(), 90, "正在生成智能总结纪要...", 0); - + taskRecord.setStatus(1); taskRecord.setStartedAt(LocalDateTime.now()); this.updateById(taskRecord); - + Long summaryModelId = Long.valueOf(taskRecord.getTaskConfig().get("summaryModelId").toString()); AiModelVO llmModel = aiModelService.getModelById(summaryModelId, "LLM"); if (llmModel == null) return; - - String promptContent = taskRecord.getTaskConfig().get("promptContent") != null ? - taskRecord.getTaskConfig().get("promptContent").toString() : ""; + + String promptContent = taskRecord.getTaskConfig().get("promptContent") != null + ? taskRecord.getTaskConfig().get("promptContent").toString() : ""; Map req = new HashMap<>(); req.put("model", llmModel.getModelCode()); req.put("temperature", llmModel.getTemperature()); req.put("messages", List.of( - Map.of("role", "system", "content", buildSummarySystemPrompt(promptContent)), - Map.of("role", "user", "content", buildSummaryUserPrompt(meeting, asrText)) + Map.of("role", "system", "content", buildSummarySystemPrompt(promptContent)), + Map.of("role", "user", "content", buildSummaryUserPrompt(meeting, asrText)) )); taskRecord.setRequestData(req); @@ -416,7 +419,7 @@ public class AiTaskServiceImpl extends ServiceImpl impleme String url = llmModel.getBaseUrl() + (llmModel.getApiPath() != null ? llmModel.getApiPath() : "/v1/chat/completions"); String requestBody = objectMapper.writeValueAsString(req); log.info("Sending LLM summary request to url={}, body={}", url, requestBody); - + HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(url)) .header("Content-Type", "application/json; charset=UTF-8") @@ -432,18 +435,22 @@ public class AiTaskServiceImpl extends ServiceImpl impleme if (response.statusCode() == 200 && respNode.has("choices")) { String content = sanitizeSummaryContent(respNode.path("choices").path(0).path("message").path("content").asText()); Map summaryBundle = meetingSummaryFileService.parseSummaryBundle(content); - String markdownContent = summaryBundle != null - ? String.valueOf(summaryBundle.getOrDefault("summaryContent", "")) - : content; - if (markdownContent == null || markdownContent.isBlank()) { - markdownContent = content; - } @SuppressWarnings("unchecked") Map normalizedAnalysis = summaryBundle != null ? (Map) summaryBundle.get("analysis") : meetingSummaryFileService.parseSummaryAnalysis(content); - - // Save to File + + String markdownContent = summaryBundle != null + ? String.valueOf(summaryBundle.getOrDefault("summaryContent", "")) + : ""; + if ((markdownContent == null || markdownContent.isBlank()) && normalizedAnalysis != null && !normalizedAnalysis.isEmpty()) { + markdownContent = meetingSummaryFileService.buildSummaryMarkdown(normalizedAnalysis); + } + if (markdownContent == null || markdownContent.isBlank()) { + updateAiTaskFail(taskRecord, "LLM summary content parse failed: " + content); + throw new RuntimeException("AI总结结果解析失败,未生成可保存的会议纪要"); + } + String timestamp = java.time.format.DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss").format(LocalDateTime.now()); String fileName = "summary_" + timestamp + ".md"; String basePath = uploadPath.endsWith("/") ? uploadPath : uploadPath + "/"; @@ -451,29 +458,25 @@ public class AiTaskServiceImpl extends ServiceImpl impleme Files.createDirectories(targetDir); Path filePath = targetDir.resolve(fileName); - Files.writeString(filePath, markdownContent, StandardCharsets.UTF_8); - + Files.writeString(filePath, markdownContent, StandardCharsets.UTF_8); + taskRecord.setResultFilePath("meetings/" + meeting.getId() + "/summaries/" + fileName); - if (summaryBundle != null || normalizedAnalysis != null) { - Map responseData = objectMapper.convertValue(respNode, Map.class); - if (summaryBundle != null) { - responseData.put("summaryBundle", summaryBundle); - } - if (normalizedAnalysis != null) { - responseData.put("normalizedAnalysis", normalizedAnalysis); - } - taskRecord.setResponseData(responseData); - taskRecord.setStatus(2); - taskRecord.setCompletedAt(LocalDateTime.now()); - this.updateById(taskRecord); - } else { - updateAiTaskSuccess(taskRecord, respNode); + Map responseData = objectMapper.convertValue(respNode, Map.class); + if (summaryBundle != null) { + responseData.put("summaryBundle", summaryBundle); } - + if (normalizedAnalysis != null) { + responseData.put("normalizedAnalysis", normalizedAnalysis); + } + taskRecord.setResponseData(responseData); + taskRecord.setStatus(2); + taskRecord.setCompletedAt(LocalDateTime.now()); + this.updateById(taskRecord); + meeting.setLatestSummaryTaskId(taskRecord.getId()); - meeting.setStatus(3); + meeting.setStatus(3); meetingMapper.updateById(meeting); - + updateProgress(meeting.getId(), 100, "全流程分析完成", 0); } else { updateAiTaskFail(taskRecord, "LLM Summary failed: " + response.body()); @@ -488,8 +491,12 @@ public class AiTaskServiceImpl extends ServiceImpl impleme progress.put("message", msg); progress.put("eta", eta); progress.put("updateAt", System.currentTimeMillis()); - redisTemplate.opsForValue().set(RedisKeys.meetingProgressKey(meetingId), - objectMapper.writeValueAsString(progress), 1, TimeUnit.HOURS); + redisTemplate.opsForValue().set( + RedisKeys.meetingProgressKey(meetingId), + objectMapper.writeValueAsString(progress), + 1, + TimeUnit.HOURS + ); } catch (Exception e) { log.error("Redis progress update error", e); } @@ -633,23 +640,34 @@ public class AiTaskServiceImpl extends ServiceImpl impleme } private void updateMeetingStatus(Long id, int status) { - Meeting m = new Meeting(); m.setId(id); m.setStatus(status); meetingMapper.updateById(m); + Meeting m = new Meeting(); + m.setId(id); + m.setStatus(status); + meetingMapper.updateById(m); } private AiTask createAiTask(Long meetingId, String type, Map req) { AiTask task = new AiTask(); - task.setMeetingId(meetingId); task.setTaskType(type); task.setStatus(1); - task.setRequestData(req); task.setStartedAt(LocalDateTime.now()); - this.save(task); return task; + task.setMeetingId(meetingId); + task.setTaskType(type); + task.setStatus(1); + task.setRequestData(req); + task.setStartedAt(LocalDateTime.now()); + this.save(task); + return task; } private void updateAiTaskSuccess(AiTask task, JsonNode resp) { - task.setStatus(2); task.setResponseData(objectMapper.convertValue(resp, Map.class)); - task.setCompletedAt(LocalDateTime.now()); this.updateById(task); + task.setStatus(2); + task.setResponseData(objectMapper.convertValue(resp, Map.class)); + task.setCompletedAt(LocalDateTime.now()); + this.updateById(task); } private void updateAiTaskFail(AiTask task, String error) { - task.setStatus(3); task.setErrorMsg(error); - task.setCompletedAt(LocalDateTime.now()); this.updateById(task); + task.setStatus(3); + task.setErrorMsg(error); + task.setCompletedAt(LocalDateTime.now()); + this.updateById(task); } } diff --git a/backend/src/main/java/com/imeeting/service/biz/impl/MeetingCommandServiceImpl.java b/backend/src/main/java/com/imeeting/service/biz/impl/MeetingCommandServiceImpl.java index 0945dc0..5e26cf0 100644 --- a/backend/src/main/java/com/imeeting/service/biz/impl/MeetingCommandServiceImpl.java +++ b/backend/src/main/java/com/imeeting/service/biz/impl/MeetingCommandServiceImpl.java @@ -2,6 +2,8 @@ package com.imeeting.service.biz.impl; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.imeeting.common.RedisKeys; import com.imeeting.dto.biz.CreateMeetingCommand; import com.imeeting.dto.biz.CreateRealtimeMeetingCommand; import com.imeeting.dto.biz.MeetingVO; @@ -18,12 +20,14 @@ import com.imeeting.service.biz.MeetingCommandService; import com.imeeting.service.biz.MeetingService; import com.imeeting.service.biz.MeetingSummaryFileService; import lombok.RequiredArgsConstructor; +import org.springframework.data.redis.core.StringRedisTemplate; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @Service @@ -36,6 +40,8 @@ public class MeetingCommandServiceImpl implements MeetingCommandService { private final com.imeeting.mapper.biz.MeetingTranscriptMapper transcriptMapper; private final MeetingSummaryFileService meetingSummaryFileService; private final MeetingDomainSupport meetingDomainSupport; + private final StringRedisTemplate redisTemplate; + private final ObjectMapper objectMapper; @Override @Transactional(rollbackFor = Exception.class) @@ -158,9 +164,12 @@ public class MeetingCommandServiceImpl implements MeetingCommandService { if (transcriptCount <= 0) { meeting.setStatus(4); meetingService.updateById(meeting); - throw new RuntimeException("鏈帴鏀跺埌鍙敤鐨勫疄鏃惰浆褰曞唴瀹?"); + throw new RuntimeException("当前会议还没有可用的转录文本,无法生成总结"); } + meeting.setStatus(2); + meetingService.updateById(meeting); + updateMeetingProgress(meetingId, 90, "正在生成智能总结纪要...", 0); aiTaskService.dispatchSummaryTask(meetingId); } @@ -232,4 +241,22 @@ public class MeetingCommandServiceImpl implements MeetingCommandService { meetingService.updateById(meeting); aiTaskService.dispatchSummaryTask(meetingId); } + + private void updateMeetingProgress(Long meetingId, int percent, String message, int eta) { + try { + Map progress = new HashMap<>(); + progress.put("percent", percent); + progress.put("message", message); + progress.put("eta", eta); + progress.put("updateAt", System.currentTimeMillis()); + redisTemplate.opsForValue().set( + RedisKeys.meetingProgressKey(meetingId), + objectMapper.writeValueAsString(progress), + 1, + TimeUnit.HOURS + ); + } catch (Exception ignored) { + // Ignore progress write failures. + } + } } diff --git a/frontend/src/api/business/meeting.ts b/frontend/src/api/business/meeting.ts index f16bfa9..9dc61ed 100644 --- a/frontend/src/api/business/meeting.ts +++ b/frontend/src/api/business/meeting.ts @@ -84,6 +84,25 @@ export interface RealtimeTranscriptItemDTO { endTime?: number; } +export interface RealtimeSocketSessionVO { + sessionToken: string; + path: string; + expiresInSeconds: number; + startMessage: Record; +} + +export interface RealtimeSocketSessionRequest { + asrModelId: number; + mode?: string; + language?: string; + useSpkId?: number; + enablePunctuation?: boolean; + enableItn?: boolean; + enableTextRefine?: boolean; + saveAudio?: boolean; + hotwords?: Array<{ hotword: string; weight: number }>; +} + export const createRealtimeMeeting = (data: CreateMeetingCommand) => { return http.post( "/api/biz/meeting/realtime/start", @@ -98,6 +117,16 @@ export const appendRealtimeTranscripts = (meetingId: number, data: RealtimeTrans ); }; +export const openRealtimeMeetingSocketSession = ( + meetingId: number, + data: RealtimeSocketSessionRequest, +) => { + return http.post( + `/api/biz/meeting/${meetingId}/realtime/socket-session`, + data + ); +}; + export const completeRealtimeMeeting = (meetingId: number, data?: { audioUrl?: string }) => { return http.post( `/api/biz/meeting/${meetingId}/realtime/complete`, diff --git a/frontend/src/pages/business/RealtimeAsr.tsx b/frontend/src/pages/business/RealtimeAsr.tsx index b2a568d..b3ec9dc 100644 --- a/frontend/src/pages/business/RealtimeAsr.tsx +++ b/frontend/src/pages/business/RealtimeAsr.tsx @@ -47,9 +47,14 @@ type RealtimeMeetingSessionDraft = { meetingTitle: string; asrModelName: string; summaryModelName: string; - wsUrl: string; + asrModelId: number; mode: string; + language: string; useSpkId: number; + enablePunctuation: boolean; + enableItn: boolean; + enableTextRefine: boolean; + saveAudio: boolean; hotwords: Array<{ hotword: string; weight: number }>; }; @@ -63,6 +68,11 @@ function resolveWsUrl(model?: AiModelVO | null) { return ""; } +function buildRealtimeProxyPreviewUrl() { + const protocol = window.location.protocol === "https:" ? "wss" : "ws"; + return `${protocol}://${window.location.host}/ws/meeting/realtime`; +} + function getSessionKey(meetingId: number) { return `realtimeMeetingSession:${meetingId}`; } @@ -127,6 +137,11 @@ export default function RealtimeAsr() { promptId: activePrompts[0]?.id, useSpkId: 1, mode: "2pass", + language: "auto", + enablePunctuation: true, + enableItn: true, + enableTextRefine: false, + saveAudio: false, }); } catch { message.error("加载实时会议配置失败"); @@ -172,9 +187,14 @@ export default function RealtimeAsr() { meetingTitle: createdMeeting.title, asrModelName: selectedAsrModel?.modelName || "ASR", summaryModelName: selectedSummaryModel?.modelName || "LLM", - wsUrl, + asrModelId: selectedAsrModel?.id || values.asrModelId, mode: values.mode || "2pass", + language: values.language || "auto", useSpkId: values.useSpkId ? 1 : 0, + enablePunctuation: values.enablePunctuation !== false, + enableItn: values.enableItn !== false, + enableTextRefine: !!values.enableTextRefine, + saveAudio: !!values.saveAudio, hotwords: selectedHotwords, }; @@ -190,10 +210,7 @@ export default function RealtimeAsr() { return (
- +
{loading ? ( @@ -210,15 +227,37 @@ export default function RealtimeAsr() { style={{ height: "100%", borderRadius: 18, boxShadow: "0 8px 22px rgba(15,23,42,0.05)" }} bodyStyle={{ height: "100%", padding: 16, display: "flex", flexDirection: "column" }} > -
+
-
+
- 创建实时会议 - 完成会前配置后再进入会中识别页。 + + 创建实时会议 + + 会前完成配置后再进入会中识别页。
@@ -229,113 +268,184 @@ export default function RealtimeAsr() {
-
+
- - - - - - - - - - - - + + + + + + + + + + + + - - - - - - - - - + {userList.map((user) => ( + + ))} + + + + + + - {asrModels.map((model) => ( - - ))} - - - - - - - - - + + + + + + + + + + + + - - - - - - - - 热词增强 }> - - - - + + + + + + + + + 热词增强{" "} + + + + + } + > + + + + - - - - - - - - 说话人区分 } - valuePropName="checked" - getValueProps={(value) => ({ checked: value === 1 || value === true })} - normalize={(value) => (value ? 1 : 0)} - > - - - - - - } readOnly /> - - - + + + + + + + + + + + + + + 说话人区分 + + + + + } + valuePropName="checked" + getValueProps={(value) => ({ checked: value === 1 || value === true })} + normalize={(value) => (value ? 1 : 0)} + > + + + + + + } readOnly /> + + + + + + + +
-
@@ -347,34 +457,70 @@ export default function RealtimeAsr() { bodyStyle={{ height: "100%", padding: 16, display: "flex", flexDirection: "column" }} > - } /> - } /> - } /> - } /> + + } /> + + + } /> + + + } + /> + + + } + /> + -
本次识别摘要
-
- -
ASR{selectedAsrModel?.modelName || "-"}
-
LLM{selectedSummaryModel?.modelName || "-"}
-
WebSocket{resolveWsUrl(selectedAsrModel) || "-"}
+
+ + 本次识别摘要 + +
+
+ +
+ ASR + {selectedAsrModel?.modelName || "-"} +
+
+ LLM + {selectedSummaryModel?.modelName || "-"} +
+
+ WebSocket + + {buildRealtimeProxyPreviewUrl()} + +
+
+
+ +
+
+ 创建成功后会直接进入识别页,不会在当前页占用麦克风。 + + +
- -
-
- 创建成功后会直接进入识别页,不会在当前页面占用麦克风。 - - - - -
-
- +
+
diff --git a/frontend/src/pages/business/RealtimeAsrSession.tsx b/frontend/src/pages/business/RealtimeAsrSession.tsx index 3779a5e..973db7b 100644 --- a/frontend/src/pages/business/RealtimeAsrSession.tsx +++ b/frontend/src/pages/business/RealtimeAsrSession.tsx @@ -1,6 +1,7 @@ -import { useEffect, useMemo, useRef, useState } from "react"; +import { useEffect, useMemo, useRef, useState } from "react"; import { Alert, + Avatar, Badge, Button, Card, @@ -20,6 +21,7 @@ import { PlayCircleOutlined, SoundOutlined, SyncOutlined, + UserOutlined, } from "@ant-design/icons"; import { useNavigate, useParams } from "react-router-dom"; import dayjs from "dayjs"; @@ -29,10 +31,11 @@ import { completeRealtimeMeeting, getMeetingDetail, getTranscripts, - uploadAudio, + openRealtimeMeetingSocketSession, type MeetingTranscriptVO, type MeetingVO, type RealtimeTranscriptItemDTO, + type RealtimeSocketSessionVO, } from "../../api/business/meeting"; const { Text, Title } = Typography; @@ -41,6 +44,18 @@ const CHUNK_SIZE = 1280; type WsSpeaker = string | { name?: string; user_id?: string | number } | undefined; type WsMessage = { + type?: string; + code?: number; + message?: string; + data?: { + text?: string; + is_final?: boolean; + start?: number; + end?: number; + speaker_id?: string; + speaker_name?: string; + user_id?: string | number | null; + }; text?: string; is_final?: boolean; speaker?: WsSpeaker; @@ -62,9 +77,14 @@ type RealtimeMeetingSessionDraft = { meetingTitle: string; asrModelName: string; summaryModelName: string; - wsUrl: string; + asrModelId: number; mode: string; + language: string; useSpkId: number; + enablePunctuation: boolean; + enableItn: boolean; + enableTextRefine: boolean; + saveAudio: boolean; hotwords: Array<{ hotword: string; weight: number }>; }; @@ -72,40 +92,6 @@ function getSessionKey(meetingId: number) { return `realtimeMeetingSession:${meetingId}`; } -function buildWavBlob(samples: number[], sampleRate: number) { - const pcmBuffer = new ArrayBuffer(samples.length * 2); - const pcmView = new DataView(pcmBuffer); - for (let i = 0; i < samples.length; i += 1) { - const value = Math.max(-1, Math.min(1, samples[i])); - pcmView.setInt16(i * 2, value < 0 ? value * 0x8000 : value * 0x7fff, true); - } - - const wavBuffer = new ArrayBuffer(44 + pcmBuffer.byteLength); - const wavView = new DataView(wavBuffer); - const writeString = (offset: number, text: string) => { - for (let i = 0; i < text.length; i += 1) { - wavView.setUint8(offset + i, text.charCodeAt(i)); - } - }; - - writeString(0, "RIFF"); - wavView.setUint32(4, 36 + pcmBuffer.byteLength, true); - writeString(8, "WAVE"); - writeString(12, "fmt "); - wavView.setUint32(16, 16, true); - wavView.setUint16(20, 1, true); - wavView.setUint16(22, 1, true); - wavView.setUint32(24, sampleRate, true); - wavView.setUint32(28, sampleRate * 2, true); - wavView.setUint16(32, 2, true); - wavView.setUint16(34, 16, true); - writeString(36, "data"); - wavView.setUint32(40, pcmBuffer.byteLength, true); - new Uint8Array(wavBuffer, 44).set(new Uint8Array(pcmBuffer)); - - return new Blob([wavBuffer], { type: "audio/wav" }); -} - function floatTo16BitPCM(input: Float32Array) { const buffer = new ArrayBuffer(input.length * 2); const view = new DataView(buffer); @@ -150,6 +136,46 @@ function formatTranscriptTime(ms?: number) { return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`; } +function toMs(value?: number) { + if (value === undefined || value === null || Number.isNaN(value)) { + return undefined; + } + return Math.round(value * 1000); +} + +function buildRealtimeProxyWsUrl(socketSession: RealtimeSocketSessionVO) { + const protocol = window.location.protocol === "https:" ? "wss" : "ws"; + return `${protocol}://${window.location.host}${socketSession.path}?sessionToken=${encodeURIComponent(socketSession.sessionToken)}`; +} + +function normalizeWsMessage(payload: WsMessage) { + if (payload.type === "partial" || payload.type === "segment") { + const data = payload.data || {}; + return { + text: data.text || "", + isFinal: payload.type === "segment" || !!data.is_final, + speaker: { + name: data.speaker_name, + user_id: data.user_id ?? data.speaker_id, + } as WsSpeaker, + startTime: toMs(data.start), + endTime: toMs(data.end), + }; + } + + if (!payload.text) { + return null; + } + + return { + text: payload.text, + isFinal: !!payload.is_final, + speaker: payload.speaker, + startTime: payload.timestamp?.[0]?.[0], + endTime: payload.timestamp?.[payload.timestamp.length - 1]?.[1], + }; +} + export default function RealtimeAsrSession() { const navigate = useNavigate(); const { id } = useParams<{ id: string }>(); @@ -175,7 +201,6 @@ export default function RealtimeAsrSession() { const audioSourceRef = useRef(null); const streamRef = useRef(null); const audioBufferRef = useRef([]); - const recordedSamplesRef = useRef([]); const completeOnceRef = useRef(false); const startedAtRef = useRef(null); @@ -278,13 +303,14 @@ export default function RealtimeAsrSession() { audioSourceRef.current = null; audioContextRef.current = null; audioBufferRef.current = []; - const recordedSamples = recordedSamplesRef.current; - recordedSamplesRef.current = []; setAudioLevel(0); - return recordedSamples.length > 0 ? buildWavBlob(recordedSamples, SAMPLE_RATE) : null; }; const startAudioPipeline = async () => { + if (!window.isSecureContext || !navigator.mediaDevices?.getUserMedia) { + throw new Error("当前浏览器环境不支持麦克风访问。请使用 localhost 或 HTTPS 域名访问系统。"); + } + const stream = await navigator.mediaDevices.getUserMedia({ audio: { channelCount: 1, @@ -300,7 +326,6 @@ export default function RealtimeAsrSession() { audioContextRef.current = audioContext; audioSourceRef.current = source; processorRef.current = processor; - recordedSamplesRef.current = []; processor.onaudioprocess = (event) => { const input = event.inputBuffer.getChannelData(0); @@ -311,7 +336,6 @@ export default function RealtimeAsrSession() { maxAmplitude = amplitude; } audioBufferRef.current.push(input[i]); - recordedSamplesRef.current.push(input[i]); } setAudioLevel(Math.min(100, Math.round(maxAmplitude * 180))); @@ -329,23 +353,28 @@ export default function RealtimeAsrSession() { processor.connect(audioContext.destination); }; - const saveFinalTranscript = async (msg: WsMessage) => { - if (!msg.text || !meetingId) { + const saveFinalTranscript = async (normalized: { + text: string; + speaker?: WsSpeaker; + startTime?: number; + endTime?: number; + }) => { + if (!normalized.text || !meetingId) { return; } - const speaker = resolveSpeaker(msg.speaker); + const speaker = resolveSpeaker(normalized.speaker); const item: RealtimeTranscriptItemDTO = { speakerId: speaker.speakerId, speakerName: speaker.speakerName, - content: msg.text, - startTime: msg.timestamp?.[0]?.[0], - endTime: msg.timestamp?.[msg.timestamp.length - 1]?.[1], + content: normalized.text, + startTime: normalized.startTime, + endTime: normalized.endTime, }; await appendRealtimeTranscripts(meetingId, [item]); }; const handleStart = async () => { - if (!sessionDraft?.wsUrl) { + if (!sessionDraft?.asrModelId) { message.error("未找到实时识别配置,请返回创建页重新进入"); return; } @@ -356,22 +385,24 @@ export default function RealtimeAsrSession() { setConnecting(true); setStatusText("连接识别服务..."); try { - const socket = new WebSocket(sessionDraft.wsUrl); + const socketSessionRes = await openRealtimeMeetingSocketSession(meetingId, { + asrModelId: sessionDraft.asrModelId, + mode: sessionDraft.mode || "2pass", + language: sessionDraft.language || "auto", + useSpkId: sessionDraft.useSpkId, + enablePunctuation: sessionDraft.enablePunctuation !== false, + enableItn: sessionDraft.enableItn !== false, + enableTextRefine: !!sessionDraft.enableTextRefine, + saveAudio: !!sessionDraft.saveAudio, + hotwords: sessionDraft.hotwords || [], + }); + const socketSession = socketSessionRes.data.data; + const socket = new WebSocket(buildRealtimeProxyWsUrl(socketSession)); socket.binaryType = "arraybuffer"; wsRef.current = socket; socket.onopen = async () => { - socket.send(JSON.stringify({ - mode: sessionDraft.mode || "2pass", - chunk_size: [0, 8, 4], - chunk_interval: 4, - wav_name: `meeting_${meetingId}`, - is_speaking: true, - speaker_name: null, - use_spk_id: sessionDraft.useSpkId, - save_audio: false, - hotwords: sessionDraft.hotwords, - })); + socket.send(JSON.stringify(socketSession.startMessage || {})); await startAudioPipeline(); startedAtRef.current = Date.now(); setConnecting(false); @@ -382,29 +413,36 @@ export default function RealtimeAsrSession() { socket.onmessage = (event) => { try { const payload = JSON.parse(event.data) as WsMessage; - if (!payload.text) { + if (payload.code && payload.message) { + setStatusText(payload.message); + message.error(payload.message); return; } - const speaker = resolveSpeaker(payload.speaker); - if (payload.is_final) { + const normalized = normalizeWsMessage(payload); + if (!normalized) { + return; + } + + const speaker = resolveSpeaker(normalized.speaker); + if (normalized.isFinal) { setTranscripts((prev) => [ ...prev, { id: `${Date.now()}-${Math.random()}`, speakerName: speaker.speakerName, userId: speaker.userId, - text: payload.text, - startTime: payload.timestamp?.[0]?.[0], - endTime: payload.timestamp?.[payload.timestamp.length - 1]?.[1], + text: normalized.text, + startTime: normalized.startTime, + endTime: normalized.endTime, final: true, }, ]); setStreamingText(""); setStreamingSpeaker("Unknown"); - void saveFinalTranscript(payload); + void saveFinalTranscript(normalized); } else { - setStreamingText(payload.text); + setStreamingText(normalized.text); setStreamingSpeaker(speaker.speakerName); } } catch { @@ -423,10 +461,10 @@ export default function RealtimeAsrSession() { setConnecting(false); setRecording(false); }; - } catch { + } catch (error) { setConnecting(false); setStatusText("启动失败"); - message.error("启动实时识别失败"); + message.error(error instanceof Error ? error.message : "启动实时识别失败"); } }; @@ -445,20 +483,10 @@ export default function RealtimeAsrSession() { wsRef.current?.close(); wsRef.current = null; - const audioBlob = await shutdownAudioPipeline(); - let uploadedAudioUrl: string | undefined; - if (audioBlob) { - try { - const file = new File([audioBlob], `meeting-${meetingId}.wav`, { type: audioBlob.type || "audio/wav" }); - const uploadRes = await uploadAudio(file); - uploadedAudioUrl = uploadRes.data.data; - } catch { - message.warning("会议音频上传失败,已保留转录内容"); - } - } + await shutdownAudioPipeline(); try { - await completeRealtimeMeeting(meetingId, uploadedAudioUrl ? { audioUrl: uploadedAudioUrl } : {}); + await completeRealtimeMeeting(meetingId, {}); sessionStorage.removeItem(getSessionKey(meetingId)); setStatusText("已提交总结任务"); message.success("实时会议已结束,正在生成总结"); @@ -499,6 +527,80 @@ export default function RealtimeAsrSession() { return (
+ navigate("/meeting-live-create")}>返回创建页} /> @@ -529,7 +631,7 @@ export default function RealtimeAsrSession() { LIVE SESSION 会中实时识别 - 会中页只保留控制区和实时转写流。 + 会中页面只保留控制区和实时转写流。
@@ -586,37 +688,35 @@ export default function RealtimeAsrSession() {
{transcripts.length === 0 && !streamingText ? (
- +
) : ( {transcripts.map((item) => ( -
-
- {item.speakerName.slice(0, 1).toUpperCase()} -
-
- - {item.speakerName} +
+
{formatTranscriptTime(item.startTime)}
+
+
+ } className="transcript-avatar" /> + {item.speakerName} {item.userId ? UID: {item.userId} : null} - }>{formatTranscriptTime(item.startTime)} - {formatTranscriptTime(item.endTime)} - -
{item.text}
+ {formatTranscriptTime(item.startTime)} - {formatTranscriptTime(item.endTime)} +
+
{item.text}
))} {streamingText ? ( -
-
- {streamingSpeaker.slice(0, 1).toUpperCase()} -
-
- - {streamingSpeaker} +
+
--:--
+
+
+ } className="transcript-avatar" /> + {streamingSpeaker} 流式草稿 - -
{streamingText}
+
+
{streamingText}
) : null} @@ -631,3 +731,4 @@ export default function RealtimeAsrSession() {
); } + diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index e7f0c4a..93599d9 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -16,7 +16,11 @@ export default defineConfig({ proxy: { "/auth": "http://localhost:8081", "/sys": "http://localhost:8081", - "/api": "http://localhost:8081" + "/api": "http://localhost:8081", + "/ws": { + target: "ws://localhost:8081", + ws: true + } } } });