增加语音合成功能

This commit is contained in:
RockYang 2025-04-01 17:03:51 +08:00
parent afb9193985
commit ff69cb231a
20 changed files with 216 additions and 88 deletions

View File

@ -44,6 +44,7 @@ func (h *ChatModelHandler) Save(c *gin.Context) {
KeyId int `json:"key_id,omitempty"`
CreatedAt int64 `json:"created_at"`
Type string `json:"type"`
Options map[string]string `json:"options"`
}
if err := c.ShouldBindJSON(&data); err != nil {
resp.ERROR(c, types.InvalidArgs)
@ -59,7 +60,6 @@ func (h *ChatModelHandler) Save(c *gin.Context) {
item.Name = data.Name
item.Value = data.Value
item.Enabled = data.Enabled
item.SortNum = data.SortNum
item.Open = data.Open
item.Power = data.Power
item.MaxTokens = data.MaxTokens
@ -67,6 +67,7 @@ func (h *ChatModelHandler) Save(c *gin.Context) {
item.Temperature = data.Temperature
item.KeyId = data.KeyId
item.Type = data.Type
item.Options = utils.JsonEncode(data.Options)
var res *gorm.DB
if data.Id > 0 {
res = h.DB.Save(&item)

View File

@ -25,6 +25,7 @@ import (
"io"
"net/http"
"net/url"
"os"
"strings"
"time"
"unicode/utf8"
@ -505,6 +506,7 @@ func (h *ChatHandler) saveChatHistory(
// 文本生成语音
func (h *ChatHandler) TextToSpeech(c *gin.Context) {
var data struct {
ModelId int `json:"model_id"`
Text string `json:"text"`
}
if err := c.ShouldBindJSON(&data); err != nil {
@ -512,40 +514,82 @@ func (h *ChatHandler) TextToSpeech(c *gin.Context) {
return
}
// 调用 DeepSeek 的 API 接口
var apiKey model.ApiKey
h.DB.Where("type", "chat").Where("enabled", true).First(&apiKey)
if apiKey.Id == 0 {
resp.ERROR(c, "no available key, please import key")
textHash := utils.Sha256(fmt.Sprintf("%d/%s", data.ModelId, data.Text))
audioFile := fmt.Sprintf("%s/audio", h.App.Config.StaticDir)
if _, err := os.Stat(audioFile); err != nil {
os.MkdirAll(audioFile, 0755)
}
audioFile = fmt.Sprintf("%s/%s.mp3", audioFile, textHash)
if _, err := os.Stat(audioFile); err == nil {
// 设置响应头
c.Header("Content-Type", "audio/mpeg")
c.Header("Content-Disposition", "attachment; filename=speech.mp3")
c.File(audioFile)
return
}
// 查询模型
var chatModel model.ChatModel
err := h.DB.Where("id", data.ModelId).First(&chatModel).Error
if err != nil {
resp.ERROR(c, "找不到语音模型")
return
}
// 调用 DeepSeek 的 API 接口
var apiKey model.ApiKey
if chatModel.KeyId > 0 {
h.DB.Where("id", chatModel.KeyId).First(&apiKey)
}
if apiKey.Id == 0 {
h.DB.Where("type", "tts").Where("enabled", true).First(&apiKey)
}
if apiKey.Id == 0 {
resp.ERROR(c, "no TTS API key, please import key")
return
}
logger.Debugf("chatModel: %+v, apiKey: %+v", chatModel, apiKey)
// 调用 openai tts api
config := openai.DefaultConfig(apiKey.Value)
config.BaseURL = apiKey.ApiURL
config.BaseURL = apiKey.ApiURL + "/v1"
client := openai.NewClientWithConfig(config)
voice := openai.VoiceAlloy
var options map[string]string
err = utils.JsonDecode(chatModel.Options, &options)
if err == nil {
voice = openai.SpeechVoice(options["voice"])
}
req := openai.CreateSpeechRequest{
Model: openai.TTSModel1,
Model: openai.SpeechModel(chatModel.Value),
Input: data.Text,
Voice: openai.VoiceAlloy,
Voice: voice,
}
audioData, err := client.CreateSpeech(context.Background(), req)
if err != nil {
logger.Error("failed to create speech: ", err)
resp.ERROR(c, "failed to create speech")
resp.ERROR(c, err.Error())
return
}
// 先将音频数据读取到内存
audioBytes, err := io.ReadAll(audioData)
if err != nil {
resp.ERROR(c, err.Error())
return
}
// 保存到音频文件
err = os.WriteFile(audioFile, audioBytes, 0644)
if err != nil {
logger.Error("failed to save audio file: ", err)
}
// 设置响应头
c.Header("Content-Type", "audio/mpeg")
c.Header("Content-Disposition", "attachment; filename=speech.mp3")
// 将音频数据写入响应
_, err = io.Copy(c.Writer, audioData)
if err != nil {
logger.Error("failed to write audio data: ", err)
resp.ERROR(c, "failed to write audio data")
return
}
// 直接写入完整的音频数据到响应
c.Writer.Write(audioBytes)
}

View File

@ -30,14 +30,17 @@ func NewChatModelHandler(app *core.AppServer, db *gorm.DB) *ChatModelHandler {
func (h *ChatModelHandler) List(c *gin.Context) {
var items []model.ChatModel
var chatModels = make([]vo.ChatModel, 0)
session := h.DB.Session(&gorm.Session{}).Where("type", "chat").Where("enabled", true)
session := h.DB.Session(&gorm.Session{}).Where("enabled", true)
t := c.Query("type")
logger.Info("type: ", t)
if t != "" {
session = session.Where("type", t)
} else {
session = session.Where("type", "chat")
}
session = session.Where("open", true)
if h.IsLogin(c) {
if h.IsLogin(c) && t == "chat" {
user, _ := h.GetLoginUser(c)
var models []int
err := utils.JsonDecode(user.ChatModels, &models)
@ -48,7 +51,7 @@ func (h *ChatModelHandler) List(c *gin.Context) {
}
res := session.Order("sort_num ASC").Find(&items)
res := session.Debug().Order("sort_num ASC").Find(&items)
if res.Error == nil {
for _, item := range items {
var cm vo.ChatModel

View File

@ -13,4 +13,5 @@ type ChatModel struct {
Temperature float32 // 模型温度
KeyId int // 绑定 API KEY ID
Type string // 模型类型
Options string // 模型选项
}

View File

@ -13,5 +13,6 @@ type ChatModel struct {
Temperature float32 `json:"temperature"` // 模型温度
KeyId int `json:"key_id,omitempty"`
KeyName string `json:"key_name"`
Options map[string]string `json:"options"`
Type string `json:"type"`
}

View File

@ -0,0 +1 @@
ALTER TABLE `chatgpt_chat_models` ADD `options` TEXT NOT NULL COMMENT '模型自定义选项' AFTER `key_id`;

View File

@ -198,7 +198,7 @@ const isExternalImg = (link, files) => {
width 100%
padding-bottom: 1.5rem;
padding-top: 1.5rem;
border-bottom: 0.5px solid var(--el-border-color);
// border-bottom: 0.5px solid var(--el-border-color);
.chat-line-inner {
display flex;

View File

@ -1,4 +1,5 @@
<template>
<div class="chat-reply">
<div class="chat-line chat-line-reply-list" v-if="listStyle === 'list'">
<div class="chat-line-inner">
<div class="chat-icon">
@ -7,10 +8,8 @@
<div class="chat-item">
<div class="content-wrapper" v-html="md.render(processContent(data.content))"></div>
<div class="bar" v-if="data.created_at">
<span class="bar-item"
><el-icon><Clock /></el-icon> {{ dateFormat(data.created_at) }}</span
>
<div class="bar flex" v-if="data.created_at">
<span class="bar-item">{{ dateFormat(data.created_at) }}</span>
<span class="bar-item">tokens: {{ data.tokens }}</span>
<span class="bar-item">
<el-tooltip class="box-item" effect="dark" content="复制回答" placement="bottom">
@ -19,16 +18,17 @@
</el-icon>
</el-tooltip>
</span>
<span v-if="!readOnly">
<span v-if="!readOnly" class="flex">
<span class="bar-item" @click="reGenerate(data.prompt)">
<el-tooltip class="box-item" effect="dark" content="重新生成" placement="bottom">
<el-icon><Refresh /></el-icon>
</el-tooltip>
</span>
<span class="bar-item" @click="synthesis(data.content)">
<span class="bar-item">
<el-tooltip class="box-item" effect="dark" content="生成语音朗读" placement="bottom">
<i class="iconfont icon-speaker"></i>
<i class="iconfont icon-speaker" v-if="!isPlaying" @click="synthesis(data.content)"></i>
<el-image class="voice-icon" :src="playIcon" v-else />
</el-tooltip>
</span>
</span>
@ -59,9 +59,7 @@
<div class="content" v-html="md.render(processContent(data.content))"></div>
</div>
<div class="bar" v-if="data.created_at">
<span class="bar-item"
><el-icon><Clock /></el-icon> {{ dateFormat(data.created_at) }}</span
>
<span class="bar-item">{{ dateFormat(data.created_at) }}</span>
<!-- <span class="bar-item">tokens: {{ data.tokens }}</span>-->
<span class="bar-item bg">
<el-tooltip class="box-item" effect="dark" content="复制回答" placement="bottom">
@ -70,24 +68,29 @@
</el-icon>
</el-tooltip>
</span>
<span v-if="!readOnly">
<span v-if="!readOnly" class="flex">
<span class="bar-item bg" @click="reGenerate(data.prompt)">
<el-tooltip class="box-item" effect="dark" content="重新生成" placement="bottom">
<el-icon><Refresh /></el-icon>
</el-tooltip>
</span>
<span class="bar-item bg" @click="synthesis(data.content)">
<el-tooltip class="box-item" effect="dark" content="生成语音朗读" placement="bottom">
<i class="iconfont icon-speaker"></i>
<span class="bar-item bg">
<el-tooltip class="box-item" effect="dark" content="生成语音朗读" placement="bottom" v-if="!isPlaying">
<i class="iconfont icon-speaker" @click="synthesis(data.content)"></i>
</el-tooltip>
<el-tooltip class="box-item" effect="dark" content="暂停播放" placement="bottom" v-else>
<el-image class="voice-icon" :src="playIcon" @click="stopSynthesis()" />
</el-tooltip>
</span>
<img src="/images/voice.gif" />
</span>
</div>
</div>
</div>
</div>
<audio ref="audio" @ended="isPlaying = false" />
</div>
</template>
<script setup>
@ -99,8 +102,8 @@ import emoji from "markdown-it-emoji";
import mathjaxPlugin from "markdown-it-mathjax3";
import MarkdownIt from "markdown-it";
import { httpPost } from "@/utils/http";
import RippleButton from "./ui/RippleButton.vue";
import { ref } from "vue";
import { useSharedStore } from "@/store/sharedata";
// eslint-disable-next-line no-undef,no-unused-vars
const props = defineProps({
data: {
@ -122,6 +125,11 @@ const props = defineProps({
},
});
const audio = ref(null);
const isPlaying = ref(false);
const playIcon = ref("/images/voice.gif");
const store = useSharedStore();
const md = new MarkdownIt({
breaks: true,
html: true,
@ -158,34 +166,38 @@ if (!props.data.icon) {
}
const synthesis = (text) => {
console.log(text);
//
httpPost("/api/chat/tts", { text }, { responseType: 'blob' }).then(response => {
isPlaying.value = true
httpPost("/api/chat/tts", { text:text, model_id:store.ttsModel }, { responseType: 'blob' }).then(response => {
// Blob MIME
const blob = new Blob([response], { type: 'audio/wav' });
// URL
const blob = new Blob([response], { type: 'audio/mpeg' }); // MP3
const audioUrl = URL.createObjectURL(blob);
//
const audio = new Audio(audioUrl);
//
audio.play().then(() => {
audio.value.src = audioUrl;
audio.value.play().then(() => {
// URL
URL.revokeObjectURL(audioUrl);
}).catch(err => {
console.error('播放音频失败:', err);
}).catch(() => {
ElMessage.error('音频播放失败,请检查浏览器是否支持该音频格式');
isPlaying.value = false
});
}).catch(err => {
console.error('语音合成请求失败:', err);
ElMessage.error('语音合成失败,请稍后重试');
}).catch(e => {
ElMessage.error('语音合成失败:' + e.message);
isPlaying.value = false
});
};
const stopSynthesis = () => {
isPlaying.value = false
audio.value.pause()
audio.value.currentTime = 0
}
//
const reGenerate = (prompt) => {
console.log(prompt);
emits("regen", prompt);
};
</script>
<style lang="stylus">
@ -307,7 +319,8 @@ const reGenerate = (prompt) => {
width 100%
padding-bottom: 1.5rem;
padding-top: 1.5rem;
border-bottom: 0.5px solid var(--el-border-color);
border: 1px solid var(--el-border-color);
border-radius: 10px;
.chat-line-inner {
display flex;
@ -347,10 +360,18 @@ const reGenerate = (prompt) => {
padding 10px 10px 10px 0;
.bar-item {
padding 3px 5px;
margin-right 10px;
border-radius 5px;
cursor pointer
display flex
align-items center
justify-content center
height 26px
.voice-icon {
width 20px
height 20px
}
.el-icon {
position relative
@ -426,11 +447,21 @@ const reGenerate = (prompt) => {
.bar {
padding 10px 10px 10px 0;
display flex
.bar-item {
padding 3px 5px;
margin-right 10px;
border-radius 5px;
display flex
align-items center
justify-content center
height 26px
.voice-icon {
width 20px
height 20px
}
.el-icon {
position relative

View File

@ -18,19 +18,28 @@
<el-form-item label="流式输出:">
<el-switch v-model="data.stream" @change="(val) => {store.setChatStream(val)}" />
</el-form-item>
<el-form-item label="语音音色:">
<el-select v-model="data.ttsModel" placeholder="请选择语音音色" @change="changeTTSModel">
<el-option v-for="v in models" :value="v.id" :label="v.name" :key="v.id">
{{ v.name }}
</el-option>
</el-select>
</el-form-item>
</el-form>
</div>
</el-dialog>
</template>
<script setup>
import {computed, ref} from "vue"
import {computed, ref, onMounted} from "vue"
import {useSharedStore} from "@/store/sharedata";
import {httpGet} from "@/utils/http";
const store = useSharedStore();
const data = ref({
style: store.chatListStyle,
stream: store.chatStream,
ttsModel: store.ttsModel,
})
// eslint-disable-next-line no-undef
const props = defineProps({
@ -44,6 +53,20 @@ const emits = defineEmits(['hide']);
const close = function () {
emits('hide', false);
}
const models = ref([]);
onMounted(() => {
//
httpGet("/api/model/list?type=tts").then((res) => {
models.value = res.data;
if (!data.ttsModel) {
store.setTtsModel(models.value[0].id);
}
})
})
const changeTTSModel = (item) => {
store.setTtsModel(item);
}
</script>
<style lang="stylus" scoped>

View File

@ -10,6 +10,7 @@ export const useSharedStore = defineStore("shared", {
theme: Storage.get("theme", "light"),
isLogin: false,
chatListExtend: Storage.get("chat_list_extend", true),
ttsModel: Storage.get("tts_model", ""),
}),
getters: {},
actions: {
@ -74,5 +75,10 @@ export const useSharedStore = defineStore("shared", {
setIsLogin(value) {
this.isLogin = value;
},
setTtsModel(value) {
this.ttsModel = value;
Storage.set("tts_model", value);
},
},
});

View File

@ -464,7 +464,7 @@ onUnmounted(() => {
//
const initData = () => {
//
httpGet("/api/model/list")
httpGet("/api/model/list?type=chat")
.then((res) => {
models.value = res.data;
if (!modelID.value) {

View File

@ -142,6 +142,7 @@ const types = ref([
{ label: "Luma视频", value: "luma" },
{ label: "可灵视频", value: "keling" },
{ label: "Realtime API", value: "realtime" },
{ label: "语音合成", value: "tts" },
{ label: "其他", value: "other" },
]);
const isEdit = ref(false);

View File

@ -126,6 +126,16 @@
</el-form-item>
</div>
<div v-if="item.type === 'tts'">
<el-form-item label="音色" prop="voice">
<el-select v-model="item.options.voice" placeholder="请选择音色">
<el-option v-for="v in voices" :value="v.value" :label="v.label" :key="v.value">
{{ v.label }}
</el-option>
</el-select>
</el-form-item>
</div>
<el-form-item label="绑定API-KEY" prop="apikey">
<el-select v-model="item.key_id" placeholder="请选择 API KEY" filterable clearable>
<el-option v-for="v in apiKeys" :value="v.id" :label="v.name" :key="v.id">
@ -191,6 +201,15 @@ const formRef = ref(null);
const type = ref([
{ label: "聊天", value: "chat" },
{ label: "绘图", value: "img" },
{ label: "语音", value: "tts" },
]);
const voices = ref([
{ label: "Echo", value: "echo" },
{ label: "Fable", value: "fable" },
{ label: "Onyx", value: "onyx" },
{ label: "Nova", value: "nova" },
{ label: "Shimmer", value: "shimmer" },
]);
// API KEY
@ -270,7 +289,7 @@ onUnmounted(() => {
const add = function () {
title.value = "新增模型";
showDialog.value = true;
item.value = { enabled: true, power: 1, open: true, max_tokens: 1024, max_context: 8192, temperature: 0.9 };
item.value = { enabled: true, power: 1, open: true, max_tokens: 1024, max_context: 8192, temperature: 0.9, options: {} };
};
const edit = function (row) {
@ -282,9 +301,6 @@ const edit = function (row) {
const save = function () {
formRef.value.validate((valid) => {
item.value.temperature = parseFloat(item.value.temperature);
if (!item.value.sort_num) {
item.value.sort_num = items.value.length;
}
if (valid) {
showDialog.value = false;
item.value.key_id = parseInt(item.value.key_id);