diff --git a/api/go.mod b/api/go.mod index 293fcbf9..e4f18276 100644 --- a/api/go.mod +++ b/api/go.mod @@ -50,6 +50,7 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/pilu/config v0.0.0-20131214182432-3eb99e6c0b9a // indirect github.com/pilu/fresh v0.0.0-20240621171608-8d1fef547a99 // indirect + github.com/sashabaranov/go-openai v1.38.1 // indirect github.com/tklauser/go-sysconf v0.3.13 // indirect github.com/tklauser/numcpus v0.7.0 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect diff --git a/api/go.sum b/api/go.sum index 1de7ee25..38297cab 100644 --- a/api/go.sum +++ b/api/go.sum @@ -203,6 +203,8 @@ github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUA github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= +github.com/sashabaranov/go-openai v1.38.1 h1:TtZabbFQZa1nEni/IhVtDF/WQjVqDgd+cWR5OeddzF8= +github.com/sashabaranov/go-openai v1.38.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8= diff --git a/api/handler/chat_handler.go b/api/handler/chat_handler.go index f24fd25e..8b171182 100644 --- a/api/handler/chat_handler.go +++ b/api/handler/chat_handler.go @@ -22,15 +22,16 @@ import ( "geekai/utils" "geekai/utils/resp" "html/template" + "io" "net/http" "net/url" - "regexp" "strings" "time" "unicode/utf8" "github.com/gin-gonic/gin" "github.com/go-redis/redis/v8" + "github.com/sashabaranov/go-openai" "gorm.io/gorm" ) @@ -501,28 +502,50 @@ func (h *ChatHandler) saveChatHistory( } } -// 将AI回复消息中生成的图片链接下载到本地 -func (h *ChatHandler) extractImgUrl(text string) string { - pattern := `!\[([^\]]*)]\(([^)]+)\)` - re := regexp.MustCompile(pattern) - matches := re.FindAllStringSubmatch(text, -1) - - // 下载图片并替换链接地址 - for _, match := range matches { - imageURL := match[2] - logger.Debug(imageURL) - // 对于相同地址的图片,已经被替换了,就不再重复下载了 - if !strings.Contains(text, imageURL) { - continue - } - - newImgURL, err := h.uploadManager.GetUploadHandler().PutUrlFile(imageURL, false) - if err != nil { - logger.Error("error with download image: ", err) - continue - } - - text = strings.ReplaceAll(text, imageURL, newImgURL) +// 文本生成语音 +func (h *ChatHandler) TextToSpeech(c *gin.Context) { + var data struct { + Text string `json:"text"` + } + if err := c.ShouldBindJSON(&data); err != nil { + resp.ERROR(c, types.InvalidArgs) + return + } + + // 调用 DeepSeek 的 API 接口 + var apiKey model.ApiKey + h.DB.Where("type", "chat").Where("enabled", true).First(&apiKey) + if apiKey.Id == 0 { + resp.ERROR(c, "no available key, please import key") + return + } + + // 调用 openai tts api + config := openai.DefaultConfig(apiKey.Value) + config.BaseURL = apiKey.ApiURL + client := openai.NewClientWithConfig(config) + req := openai.CreateSpeechRequest{ + Model: openai.TTSModel1, + Input: data.Text, + Voice: openai.VoiceAlloy, + } + + audioData, err := client.CreateSpeech(context.Background(), req) + if err != nil { + logger.Error("failed to create speech: ", err) + resp.ERROR(c, "failed to create speech") + return + } + + // 设置响应头 + c.Header("Content-Type", "audio/mpeg") + c.Header("Content-Disposition", "attachment; filename=speech.mp3") + + // 将音频数据写入响应 + _, err = io.Copy(c.Writer, audioData) + if err != nil { + logger.Error("failed to write audio data: ", err) + resp.ERROR(c, "failed to write audio data") + return } - return text } diff --git a/api/main.go b/api/main.go index 60caf25c..0f67adb6 100644 --- a/api/main.go +++ b/api/main.go @@ -251,6 +251,7 @@ func main() { group.GET("clear", h.Clear) group.POST("tokens", h.Tokens) group.GET("stop", h.StopGenerate) + group.POST("tts", h.TextToSpeech) }), fx.Invoke(func(s *core.AppServer, h *handler.NetHandler) { s.Engine.POST("/api/upload", h.Upload) diff --git a/web/public/images/voice.gif b/web/public/images/voice.gif new file mode 100644 index 00000000..f6fb2e9e Binary files /dev/null and b/web/public/images/voice.gif differ diff --git a/web/src/components/ChatReply.vue b/web/src/components/ChatReply.vue index beb1d165..7922cf62 100644 --- a/web/src/components/ChatReply.vue +++ b/web/src/components/ChatReply.vue @@ -82,6 +82,7 @@ + @@ -97,6 +98,8 @@ import hl from "highlight.js"; import emoji from "markdown-it-emoji"; import mathjaxPlugin from "markdown-it-mathjax3"; import MarkdownIt from "markdown-it"; +import { httpPost } from "@/utils/http"; +import RippleButton from "./ui/RippleButton.vue"; // eslint-disable-next-line no-undef,no-unused-vars const props = defineProps({ @@ -156,7 +159,26 @@ if (!props.data.icon) { const synthesis = (text) => { console.log(text); - ElMessage.info("语音合成功能暂不可用"); + // 生成语音 + httpPost("/api/chat/tts", { text }, { responseType: 'blob' }).then(response => { + // 创建 Blob 对象,明确指定 MIME 类型 + const blob = new Blob([response], { type: 'audio/wav' }); + // 创建 URL + const audioUrl = URL.createObjectURL(blob); + // 创建音频元素 + const audio = new Audio(audioUrl); + // 播放音频 + audio.play().then(() => { + // 播放完成后释放 URL + URL.revokeObjectURL(audioUrl); + }).catch(err => { + console.error('播放音频失败:', err); + ElMessage.error('音频播放失败,请检查浏览器是否支持该音频格式'); + }); + }).catch(err => { + console.error('语音合成请求失败:', err); + ElMessage.error('语音合成失败,请稍后重试'); + }); }; // 重新生成 @@ -168,6 +190,7 @@ const reGenerate = (prompt) => {