语音播报

This commit is contained in:
RockYang 2025-03-31 18:12:12 +08:00
parent 14fa4fdaa0
commit afb9193985
6 changed files with 75 additions and 25 deletions

View File

@ -50,6 +50,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/pilu/config v0.0.0-20131214182432-3eb99e6c0b9a // indirect
github.com/pilu/fresh v0.0.0-20240621171608-8d1fef547a99 // indirect
github.com/sashabaranov/go-openai v1.38.1 // indirect
github.com/tklauser/go-sysconf v0.3.13 // indirect
github.com/tklauser/numcpus v0.7.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect

View File

@ -203,6 +203,8 @@ github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUA
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc=
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/sashabaranov/go-openai v1.38.1 h1:TtZabbFQZa1nEni/IhVtDF/WQjVqDgd+cWR5OeddzF8=
github.com/sashabaranov/go-openai v1.38.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=

View File

@ -22,15 +22,16 @@ import (
"geekai/utils"
"geekai/utils/resp"
"html/template"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"unicode/utf8"
"github.com/gin-gonic/gin"
"github.com/go-redis/redis/v8"
"github.com/sashabaranov/go-openai"
"gorm.io/gorm"
)
@ -501,28 +502,50 @@ func (h *ChatHandler) saveChatHistory(
}
}
// 将AI回复消息中生成的图片链接下载到本地
func (h *ChatHandler) extractImgUrl(text string) string {
pattern := `!\[([^\]]*)]\(([^)]+)\)`
re := regexp.MustCompile(pattern)
matches := re.FindAllStringSubmatch(text, -1)
// 下载图片并替换链接地址
for _, match := range matches {
imageURL := match[2]
logger.Debug(imageURL)
// 对于相同地址的图片,已经被替换了,就不再重复下载了
if !strings.Contains(text, imageURL) {
continue
}
newImgURL, err := h.uploadManager.GetUploadHandler().PutUrlFile(imageURL, false)
if err != nil {
logger.Error("error with download image: ", err)
continue
}
text = strings.ReplaceAll(text, imageURL, newImgURL)
// 文本生成语音
func (h *ChatHandler) TextToSpeech(c *gin.Context) {
var data struct {
Text string `json:"text"`
}
if err := c.ShouldBindJSON(&data); err != nil {
resp.ERROR(c, types.InvalidArgs)
return
}
// 调用 DeepSeek 的 API 接口
var apiKey model.ApiKey
h.DB.Where("type", "chat").Where("enabled", true).First(&apiKey)
if apiKey.Id == 0 {
resp.ERROR(c, "no available key, please import key")
return
}
// 调用 openai tts api
config := openai.DefaultConfig(apiKey.Value)
config.BaseURL = apiKey.ApiURL
client := openai.NewClientWithConfig(config)
req := openai.CreateSpeechRequest{
Model: openai.TTSModel1,
Input: data.Text,
Voice: openai.VoiceAlloy,
}
audioData, err := client.CreateSpeech(context.Background(), req)
if err != nil {
logger.Error("failed to create speech: ", err)
resp.ERROR(c, "failed to create speech")
return
}
// 设置响应头
c.Header("Content-Type", "audio/mpeg")
c.Header("Content-Disposition", "attachment; filename=speech.mp3")
// 将音频数据写入响应
_, err = io.Copy(c.Writer, audioData)
if err != nil {
logger.Error("failed to write audio data: ", err)
resp.ERROR(c, "failed to write audio data")
return
}
return text
}

View File

@ -251,6 +251,7 @@ func main() {
group.GET("clear", h.Clear)
group.POST("tokens", h.Tokens)
group.GET("stop", h.StopGenerate)
group.POST("tts", h.TextToSpeech)
}),
fx.Invoke(func(s *core.AppServer, h *handler.NetHandler) {
s.Engine.POST("/api/upload", h.Upload)

BIN
web/public/images/voice.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

View File

@ -82,6 +82,7 @@
<i class="iconfont icon-speaker"></i>
</el-tooltip>
</span>
<img src="/images/voice.gif" />
</span>
</div>
</div>
@ -97,6 +98,8 @@ import hl from "highlight.js";
import emoji from "markdown-it-emoji";
import mathjaxPlugin from "markdown-it-mathjax3";
import MarkdownIt from "markdown-it";
import { httpPost } from "@/utils/http";
import RippleButton from "./ui/RippleButton.vue";
// eslint-disable-next-line no-undef,no-unused-vars
const props = defineProps({
@ -156,7 +159,26 @@ if (!props.data.icon) {
const synthesis = (text) => {
console.log(text);
ElMessage.info("语音合成功能暂不可用");
//
httpPost("/api/chat/tts", { text }, { responseType: 'blob' }).then(response => {
// Blob MIME
const blob = new Blob([response], { type: 'audio/wav' });
// URL
const audioUrl = URL.createObjectURL(blob);
//
const audio = new Audio(audioUrl);
//
audio.play().then(() => {
// URL
URL.revokeObjectURL(audioUrl);
}).catch(err => {
console.error('播放音频失败:', err);
ElMessage.error('音频播放失败,请检查浏览器是否支持该音频格式');
});
}).catch(err => {
console.error('语音合成请求失败:', err);
ElMessage.error('语音合成失败,请稍后重试');
});
};
//
@ -168,6 +190,7 @@ const reGenerate = (prompt) => {
<style lang="stylus">
@import '@/assets/css/markdown/vue.css';
.chat-page,.chat-export {
--font-family: Menlo,"微软雅黑","Roboto Mono","Courier New",Courier,monospace,"Inter",sans-serif;
font-family: var(--font-family);