chyezh fda720b880
enhance: streaming service grpc utilities (#34436)
issue: #33285

- add two grpc resolver (by session and by streaming coord assignment
service)
- add one grpc balancer (by serverID and roundrobin)
- add lazy conn to avoid block by first service discovery
- add some utility function for streaming service

Signed-off-by: chyezh <chyezh@outlook.com>
2024-07-15 20:49:38 +08:00

233 lines
7.6 KiB
Go

package producer
import (
"io"
"strconv"
"sync"
"time"
"github.com/cockroachdb/errors"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/proto/streamingpb"
"github.com/milvus-io/milvus/internal/streamingnode/server/wal"
"github.com/milvus-io/milvus/internal/streamingnode/server/walmanager"
"github.com/milvus-io/milvus/internal/util/streamingutil/service/contextutil"
"github.com/milvus-io/milvus/internal/util/streamingutil/status"
"github.com/milvus-io/milvus/internal/util/streamingutil/typeconverter"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/streaming/util/message"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
// CreateProduceServer create a new producer.
// Expected message sequence:
// CreateProducer (Header)
// ProduceRequest 1 -> ProduceResponse Or Error 1
// ProduceRequest 2 -> ProduceResponse Or Error 2
// ProduceRequest 3 -> ProduceResponse Or Error 3
// CloseProducer
func CreateProduceServer(walManager walmanager.Manager, streamServer streamingpb.StreamingNodeHandlerService_ProduceServer) (*ProduceServer, error) {
createReq, err := contextutil.GetCreateProducer(streamServer.Context())
if err != nil {
return nil, status.NewInvaildArgument("create producer request is required")
}
l, err := walManager.GetAvailableWAL(typeconverter.NewPChannelInfoFromProto(createReq.Pchannel))
if err != nil {
return nil, err
}
produceServer := &produceGrpcServerHelper{
StreamingNodeHandlerService_ProduceServer: streamServer,
}
if err := produceServer.SendCreated(l.WALName()); err != nil {
return nil, errors.Wrap(err, "at send created")
}
return &ProduceServer{
wal: l,
produceServer: produceServer,
logger: log.With(zap.String("channel", l.Channel().Name), zap.Int64("term", l.Channel().Term)),
produceMessageCh: make(chan *streamingpb.ProduceMessageResponse),
appendWG: sync.WaitGroup{},
}, nil
}
// ProduceServer is a ProduceServer of log messages.
type ProduceServer struct {
wal wal.WAL
produceServer *produceGrpcServerHelper
logger *log.MLogger
produceMessageCh chan *streamingpb.ProduceMessageResponse // All processing messages result should sent from theses channel.
appendWG sync.WaitGroup
}
// Execute starts the producer.
func (p *ProduceServer) Execute() error {
// Start a recv arm to handle the control message from client.
go func() {
// recv loop will be blocked until the stream is closed.
// 1. close by client.
// 2. close by server context cancel by return of outside Execute.
_ = p.recvLoop()
}()
// Start a send loop on current main goroutine.
// the loop will be blocked until:
// 1. the stream is broken.
// 2. recv arm recv closed and all response is sent.
return p.sendLoop()
}
// sendLoop sends the message to client.
func (p *ProduceServer) sendLoop() (err error) {
defer func() {
if err != nil {
p.logger.Warn("send arm of stream closed by unexpected error", zap.Error(err))
return
}
p.logger.Info("send arm of stream closed")
}()
for {
select {
case resp, ok := <-p.produceMessageCh:
if !ok {
// all message has been sent, sent close response.
p.produceServer.SendClosed()
return nil
}
if err := p.produceServer.SendProduceMessage(resp); err != nil {
return err
}
case <-p.produceServer.Context().Done():
return errors.Wrap(p.produceServer.Context().Err(), "cancel send loop by stream server")
}
}
}
// recvLoop receives the message from client.
func (p *ProduceServer) recvLoop() (err error) {
defer func() {
p.appendWG.Wait()
close(p.produceMessageCh)
if err != nil {
p.logger.Warn("recv arm of stream closed by unexpected error", zap.Error(err))
return
}
p.logger.Info("recv arm of stream closed")
}()
for {
req, err := p.produceServer.Recv()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
switch req := req.Request.(type) {
case *streamingpb.ProduceRequest_Produce:
p.handleProduce(req.Produce)
case *streamingpb.ProduceRequest_Close:
p.logger.Info("recv arm of stream start to close, waiting for all append request finished...")
// we will receive io.EOF after that.
default:
// skip message here, to keep the forward compatibility.
p.logger.Warn("unknown request type", zap.Any("request", req))
}
}
}
// handleProduce handles the produce message request.
func (p *ProduceServer) handleProduce(req *streamingpb.ProduceMessageRequest) {
p.logger.Debug("recv produce message from client", zap.Int64("requestID", req.RequestId))
msg := message.NewMutableMessageBuilder().
WithPayload(req.GetMessage().GetPayload()).
WithProperties(req.GetMessage().GetProperties()).
BuildMutable()
if err := p.validateMessage(msg); err != nil {
p.logger.Warn("produce message validation failed", zap.Int64("requestID", req.RequestId), zap.Error(err))
p.sendProduceResult(req.RequestId, nil, err)
return
}
// Append message to wal.
// Concurrent append request can be executed concurrently.
messageSize := msg.EstimateSize()
now := time.Now()
p.appendWG.Add(1)
p.wal.AppendAsync(p.produceServer.Context(), msg, func(id message.MessageID, err error) {
defer func() {
p.appendWG.Done()
p.updateMetrics(messageSize, time.Since(now).Seconds(), err)
}()
p.sendProduceResult(req.RequestId, id, err)
})
}
// validateMessage validates the message.
func (p *ProduceServer) validateMessage(msg message.MutableMessage) error {
// validate the msg.
if !msg.Version().GT(message.VersionOld) {
return status.NewInner("unsupported message version")
}
if !msg.MessageType().Valid() {
return status.NewInner("unsupported message type")
}
if msg.Payload() == nil {
return status.NewInner("empty payload for message")
}
return nil
}
// sendProduceResult sends the produce result to client.
func (p *ProduceServer) sendProduceResult(reqID int64, id message.MessageID, err error) {
resp := &streamingpb.ProduceMessageResponse{
RequestId: reqID,
}
if err != nil {
p.logger.Warn("append message to wal failed", zap.Int64("requestID", reqID), zap.Error(err))
resp.Response = &streamingpb.ProduceMessageResponse_Error{
Error: status.AsStreamingError(err).AsPBError(),
}
} else {
resp.Response = &streamingpb.ProduceMessageResponse_Result{
Result: &streamingpb.ProduceMessageResponseResult{
Id: &streamingpb.MessageID{
Id: id.Marshal(),
},
},
}
}
// If server context is canceled, it means the stream has been closed.
// all pending response message should be dropped, client side will handle it.
select {
case p.produceMessageCh <- resp:
p.logger.Debug("send produce message response to client", zap.Int64("requestID", reqID), zap.Any("messageID", id), zap.Error(err))
case <-p.produceServer.Context().Done():
p.logger.Warn("stream closed before produce message response sent", zap.Int64("requestID", reqID), zap.Any("messageID", id))
return
}
}
// updateMetrics updates the metrics.
func (p *ProduceServer) updateMetrics(messageSize int, cost float64, err error) {
name := p.wal.Channel().Name
term := strconv.FormatInt(p.wal.Channel().Term, 10)
metrics.StreamingNodeProduceBytes.WithLabelValues(paramtable.GetStringNodeID(), name, term, getStatusLabel(err)).Observe(float64(messageSize))
metrics.StreamingNodeProduceDurationSeconds.WithLabelValues(paramtable.GetStringNodeID(), name, term, getStatusLabel(err)).Observe(cost)
}
// getStatusLabel returns the status label of error.
func getStatusLabel(err error) string {
if status.IsCanceled(err) {
return metrics.CancelLabel
}
if err != nil {
return metrics.FailLabel
}
return metrics.SuccessLabel
}