imetting/backend/sql/migrations/split_llm_audio_model_table...

171 lines
7.0 KiB
PL/PgSQL

-- Migration: split LLM and audio model configs into dedicated tables
-- Created at: 2026-03-12
BEGIN;
CREATE TABLE IF NOT EXISTS `llm_model_config` (
`config_id` bigint(20) NOT NULL AUTO_INCREMENT,
`model_code` varchar(128) NOT NULL,
`model_name` varchar(255) NOT NULL,
`provider` varchar(64) DEFAULT NULL,
`endpoint_url` varchar(512) DEFAULT NULL,
`api_key` varchar(512) DEFAULT NULL,
`llm_model_name` varchar(128) NOT NULL,
`llm_timeout` int(11) NOT NULL DEFAULT 120,
`llm_temperature` decimal(5,2) NOT NULL DEFAULT 0.70,
`llm_top_p` decimal(5,2) NOT NULL DEFAULT 0.90,
`llm_max_tokens` int(11) NOT NULL DEFAULT 2048,
`llm_system_prompt` text DEFAULT NULL,
`description` varchar(500) DEFAULT NULL,
`is_active` tinyint(1) NOT NULL DEFAULT 1,
`is_default` tinyint(1) NOT NULL DEFAULT 0,
`created_at` datetime DEFAULT CURRENT_TIMESTAMP,
`updated_at` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`config_id`),
UNIQUE KEY `uk_llm_model_code` (`model_code`),
KEY `idx_llm_active` (`is_active`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE IF NOT EXISTS `audio_model_config` (
`config_id` bigint(20) NOT NULL AUTO_INCREMENT,
`model_code` varchar(128) NOT NULL,
`model_name` varchar(255) NOT NULL,
`audio_scene` varchar(32) NOT NULL COMMENT 'asr / voiceprint',
`provider` varchar(64) DEFAULT NULL,
`endpoint_url` varchar(512) DEFAULT NULL,
`api_key` varchar(512) DEFAULT NULL,
`asr_model_name` varchar(128) DEFAULT NULL,
`asr_vocabulary_id` varchar(255) DEFAULT NULL,
`asr_speaker_count` int(11) DEFAULT NULL,
`asr_language_hints` varchar(255) DEFAULT NULL,
`asr_disfluency_removal_enabled` tinyint(1) DEFAULT NULL,
`asr_diarization_enabled` tinyint(1) DEFAULT NULL,
`vp_template_text` text DEFAULT NULL,
`vp_duration_seconds` int(11) DEFAULT NULL,
`vp_sample_rate` int(11) DEFAULT NULL,
`vp_channels` int(11) DEFAULT NULL,
`vp_max_size_bytes` bigint(20) DEFAULT NULL,
`description` varchar(500) DEFAULT NULL,
`is_active` tinyint(1) NOT NULL DEFAULT 1,
`is_default` tinyint(1) NOT NULL DEFAULT 0,
`created_at` datetime DEFAULT CURRENT_TIMESTAMP,
`updated_at` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`config_id`),
UNIQUE KEY `uk_audio_model_code` (`model_code`),
KEY `idx_audio_scene` (`audio_scene`),
KEY `idx_audio_active` (`is_active`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- migrate llm rows
INSERT INTO `llm_model_config`
(model_code, model_name, provider, endpoint_url, api_key, llm_model_name, llm_timeout,
llm_temperature, llm_top_p, llm_max_tokens, llm_system_prompt, description, is_active, is_default)
SELECT
model_code,
model_name,
provider,
endpoint_url,
api_key,
COALESCE(llm_model_name, JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.model_name')), 'qwen-plus'),
COALESCE(llm_timeout, CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.time_out')) AS UNSIGNED), 120),
COALESCE(llm_temperature, CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.temperature')) AS DECIMAL(5,2)), 0.70),
COALESCE(llm_top_p, CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.top_p')) AS DECIMAL(5,2)), 0.90),
COALESCE(llm_max_tokens, CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.max_tokens')) AS UNSIGNED), 2048),
COALESCE(llm_system_prompt, JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.system_prompt'))),
description,
is_active,
is_default
FROM ai_model_configs
WHERE model_type = 'llm'
ON DUPLICATE KEY UPDATE
model_name = VALUES(model_name),
provider = VALUES(provider),
endpoint_url = VALUES(endpoint_url),
api_key = VALUES(api_key),
llm_model_name = VALUES(llm_model_name),
llm_timeout = VALUES(llm_timeout),
llm_temperature = VALUES(llm_temperature),
llm_top_p = VALUES(llm_top_p),
llm_max_tokens = VALUES(llm_max_tokens),
llm_system_prompt = VALUES(llm_system_prompt),
description = VALUES(description),
is_active = VALUES(is_active),
is_default = VALUES(is_default);
-- migrate audio recognition rows
INSERT INTO `audio_model_config`
(model_code, model_name, audio_scene, provider, endpoint_url, api_key, asr_model_name, asr_vocabulary_id,
asr_speaker_count, asr_language_hints, asr_disfluency_removal_enabled, asr_diarization_enabled,
description, is_active, is_default)
SELECT
model_code,
model_name,
'asr',
provider,
endpoint_url,
api_key,
COALESCE(asr_model_name, JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.model')), 'paraformer-v2'),
COALESCE(asr_vocabulary_id, JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.vocabulary_id'))),
COALESCE(asr_speaker_count, CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.speaker_count')) AS UNSIGNED), 10),
COALESCE(asr_language_hints, REPLACE(REPLACE(REPLACE(JSON_EXTRACT(config_json, '$.language_hints'), '"', ''), '[', ''), ']', ''), 'zh,en'),
COALESCE(asr_disfluency_removal_enabled,
CASE LOWER(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.disfluency_removal_enabled'))) WHEN 'true' THEN 1 WHEN '1' THEN 1 ELSE 0 END),
COALESCE(asr_diarization_enabled,
CASE LOWER(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.diarization_enabled'))) WHEN 'true' THEN 1 WHEN '1' THEN 1 ELSE 0 END),
description,
is_active,
is_default
FROM ai_model_configs
WHERE model_code = 'audio_model'
ON DUPLICATE KEY UPDATE
model_name = VALUES(model_name),
provider = VALUES(provider),
endpoint_url = VALUES(endpoint_url),
api_key = VALUES(api_key),
asr_model_name = VALUES(asr_model_name),
asr_vocabulary_id = VALUES(asr_vocabulary_id),
asr_speaker_count = VALUES(asr_speaker_count),
asr_language_hints = VALUES(asr_language_hints),
asr_disfluency_removal_enabled = VALUES(asr_disfluency_removal_enabled),
asr_diarization_enabled = VALUES(asr_diarization_enabled),
description = VALUES(description),
is_active = VALUES(is_active),
is_default = VALUES(is_default);
-- migrate voiceprint rows
INSERT INTO `audio_model_config`
(model_code, model_name, audio_scene, provider, endpoint_url, api_key, vp_template_text, vp_duration_seconds,
vp_sample_rate, vp_channels, vp_max_size_bytes, description, is_active, is_default)
SELECT
model_code,
model_name,
'voiceprint',
provider,
endpoint_url,
api_key,
JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.template_text')),
CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.duration_seconds')) AS UNSIGNED),
CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.sample_rate')) AS UNSIGNED),
CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.channels')) AS UNSIGNED),
CAST(JSON_UNQUOTE(JSON_EXTRACT(config_json, '$.voiceprint_max_size')) AS UNSIGNED),
description,
is_active,
is_default
FROM ai_model_configs
WHERE model_code = 'voiceprint_model'
ON DUPLICATE KEY UPDATE
model_name = VALUES(model_name),
provider = VALUES(provider),
endpoint_url = VALUES(endpoint_url),
api_key = VALUES(api_key),
vp_template_text = VALUES(vp_template_text),
vp_duration_seconds = VALUES(vp_duration_seconds),
vp_sample_rate = VALUES(vp_sample_rate),
vp_channels = VALUES(vp_channels),
vp_max_size_bytes = VALUES(vp_max_size_bytes),
description = VALUES(description),
is_active = VALUES(is_active),
is_default = VALUES(is_default);
COMMIT;