milvus/internal/storage/binlog_writer.go
XuanYang-cn 37a447d166
feat: Add CMEK cipher plugin (#43722)
1. Enable Milvus to read cipher configs
2. Enable cipher plugin in binlog reader and writer
3. Add a testCipher for unittests
4. Support pooling for datanode
5. Add encryption in storagev2

See also: #40321 
Signed-off-by: yangxuan <xuan.yang@zilliz.com>

---------

Signed-off-by: yangxuan <xuan.yang@zilliz.com>
2025-08-27 11:15:52 +08:00

314 lines
8.3 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"bytes"
"encoding/binary"
"github.com/cockroachdb/errors"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/hook"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/log"
)
// BinlogType is to distinguish different files saving different data.
type BinlogType int32
const (
// InsertBinlog BinlogType for insert data
InsertBinlog BinlogType = iota
// DeleteBinlog BinlogType for delete data
DeleteBinlog
// DDLBinlog BinlogType for DDL
DDLBinlog
// IndexFileBinlog BinlogType for index
IndexFileBinlog
// StatsBinlog BinlogType for stats data
StatsBinlog
// BM25 BinlogType for bm25 stats data
BM25Binlog
)
const (
// MagicNumber used in binlog
MagicNumber int32 = 0xfffabc
)
func (b BinlogType) String() string {
switch b {
case InsertBinlog:
return "InsertBinlog"
case DeleteBinlog:
return "DeleteBinlog"
case DDLBinlog:
return "DDLBinlog"
case IndexFileBinlog:
return "IndexFileBinlog"
case StatsBinlog:
return "StatsBinlog"
case BM25Binlog:
return "BM25"
}
return "BinlogType"
}
type baseBinlogWriter struct {
*descriptorEvent
magicNumber int32
binlogType BinlogType
eventWriters []EventWriter
buffer *bytes.Buffer
length int32
encryptor hook.Encryptor
}
func (writer *baseBinlogWriter) isClosed() bool {
return writer.buffer != nil
}
// GetEventNums returns the number of event writers
func (writer *baseBinlogWriter) GetEventNums() int32 {
return int32(len(writer.eventWriters))
}
// GetRowNums returns writer's number of rows
func (writer *baseBinlogWriter) GetRowNums() (int32, error) {
if writer.isClosed() {
return writer.length, nil
}
length := 0
for _, e := range writer.eventWriters {
rows, err := e.GetPayloadLengthFromWriter()
if err != nil {
return 0, err
}
length += rows
}
return int32(length), nil
}
// GetBinlogType returns writer's binlogType
func (writer *baseBinlogWriter) GetBinlogType() BinlogType {
return writer.binlogType
}
// GetBuffer gets binlog buffer. Return nil if binlog is not finished yet.
func (writer *baseBinlogWriter) GetBuffer() ([]byte, error) {
if writer.buffer == nil {
return nil, errors.New("please close binlog before get buffer")
}
return writer.buffer.Bytes(), nil
}
// Finish allocates buffer and releases resource
func (writer *baseBinlogWriter) Finish() error {
if writer.buffer != nil {
return nil
}
if writer.StartTimestamp == 0 || writer.EndTimestamp == 0 {
return errors.New("invalid start/end timestamp")
}
var offset int32
writer.buffer = new(bytes.Buffer)
if err := binary.Write(writer.buffer, common.Endian, MagicNumber); err != nil {
return err
}
offset += int32(binary.Size(MagicNumber))
if err := writer.descriptorEvent.Write(writer.buffer); err != nil {
return err
}
offset += writer.descriptorEvent.GetMemoryUsageInBytes()
eventBuffer := writer.buffer
if writer.encryptor != nil {
eventBuffer = new(bytes.Buffer)
}
writer.length = 0
for _, w := range writer.eventWriters {
w.SetOffset(offset)
if err := w.Finish(); err != nil {
return err
}
if err := w.Write(eventBuffer); err != nil {
return err
}
length, err := w.GetMemoryUsageInBytes()
if err != nil {
return err
}
offset += length
rows, err := w.GetPayloadLengthFromWriter()
if err != nil {
return err
}
writer.length += int32(rows)
}
if writer.encryptor != nil {
encrypted, err := writer.encryptor.Encrypt(eventBuffer.Bytes())
if err != nil {
return err
}
log.Debug("Binlog writer encrypted plain text",
zap.String("writer type", writer.binlogType.String()),
zap.Int("plain size", eventBuffer.Len()),
zap.Int("cipher size", len(encrypted)))
if err := binary.Write(writer.buffer, common.Endian, encrypted); err != nil {
return err
}
}
return nil
}
func (writer *baseBinlogWriter) Close() {
for _, e := range writer.eventWriters {
e.Close()
}
}
// InsertBinlogWriter is an object to write binlog file which saves insert data.
type InsertBinlogWriter struct {
baseBinlogWriter
}
// NextInsertEventWriter returns an event writer to write insert data to an event.
func (writer *InsertBinlogWriter) NextInsertEventWriter(opts ...PayloadWriterOptions) (*insertEventWriter, error) {
if writer.isClosed() {
return nil, errors.New("binlog has closed")
}
event, err := newInsertEventWriter(writer.PayloadDataType, opts...)
if err != nil {
return nil, err
}
writer.eventWriters = append(writer.eventWriters, event)
return event, nil
}
// DeleteBinlogWriter is an object to write binlog file which saves delete data.
type DeleteBinlogWriter struct {
baseBinlogWriter
}
// NextDeleteEventWriter returns an event writer to write delete data to an event.
func (writer *DeleteBinlogWriter) NextDeleteEventWriter(opts ...PayloadWriterOptions) (*deleteEventWriter, error) {
if writer.isClosed() {
return nil, errors.New("binlog has closed")
}
event, err := newDeleteEventWriter(writer.PayloadDataType, opts...)
if err != nil {
return nil, err
}
writer.eventWriters = append(writer.eventWriters, event)
return event, nil
}
// IndexFileBinlogWriter is an object to write binlog file which saves index files
type IndexFileBinlogWriter struct {
baseBinlogWriter
}
// NextIndexFileEventWriter return next available EventWriter
func (writer *IndexFileBinlogWriter) NextIndexFileEventWriter() (*indexFileEventWriter, error) {
if writer.isClosed() {
return nil, errors.New("binlog has closed")
}
event, err := newIndexFileEventWriter(writer.PayloadDataType)
if err != nil {
return nil, err
}
writer.eventWriters = append(writer.eventWriters, event)
return event, nil
}
// NewInsertBinlogWriter creates InsertBinlogWriter to write binlog file.
func NewInsertBinlogWriter(
dataType schemapb.DataType, collectionID, partitionID, segmentID, FieldID int64, nullable bool,
opts ...BinlogWriterOptions,
) *InsertBinlogWriter {
descriptorEvent := newDescriptorEvent()
descriptorEvent.PayloadDataType = dataType
descriptorEvent.CollectionID = collectionID
descriptorEvent.PartitionID = partitionID
descriptorEvent.SegmentID = segmentID
descriptorEvent.FieldID = FieldID
// store nullable in extra for compatible
descriptorEvent.AddExtra(nullableKey, nullable)
baseWriter := baseBinlogWriter{
descriptorEvent: descriptorEvent,
magicNumber: MagicNumber,
binlogType: InsertBinlog,
eventWriters: make([]EventWriter, 0),
buffer: nil,
}
for _, opt := range opts {
opt(&baseWriter)
}
w := &InsertBinlogWriter{
baseBinlogWriter: baseWriter,
}
return w
}
// NewDeleteBinlogWriter creates DeleteBinlogWriter to write binlog file.
func NewDeleteBinlogWriter(
dataType schemapb.DataType, collectionID, partitionID, segmentID int64,
opts ...BinlogWriterOptions,
) *DeleteBinlogWriter {
descriptorEvent := newDescriptorEvent()
descriptorEvent.PayloadDataType = dataType
descriptorEvent.CollectionID = collectionID
descriptorEvent.PartitionID = partitionID
descriptorEvent.SegmentID = segmentID
baseWriter := baseBinlogWriter{
descriptorEvent: descriptorEvent,
magicNumber: MagicNumber,
binlogType: InsertBinlog,
eventWriters: make([]EventWriter, 0),
buffer: nil,
}
for _, opt := range opts {
opt(&baseWriter)
}
w := &DeleteBinlogWriter{
baseBinlogWriter: baseWriter,
}
return w
}
type BinlogWriterOptions func(base *baseBinlogWriter)
func WithWriterEncryptionContext(ezID int64, edek []byte, encryptor hook.Encryptor) BinlogWriterOptions {
return func(base *baseBinlogWriter) {
base.AddExtra(edekKey, string(edek))
base.AddExtra(ezIDKey, ezID)
base.encryptor = encryptor
}
}