wei liu 5038036ece
enhance: Reuse hash locations during access bloom fitler (#32642)
issue: #32530 

when try to match segment bloom filter with pk, we can reuse the hash
locations. This PR maintain the max hash Func, and compute hash location
once for all segment, reuse hash location can speed up bf access

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
2024-05-07 06:13:47 -07:00

226 lines
6.3 KiB
Go

package segments
import (
"context"
"fmt"
"testing"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
storage "github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/initcore"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
type SegmentSuite struct {
suite.Suite
rootPath string
chunkManager storage.ChunkManager
// Data
manager *Manager
collectionID int64
partitionID int64
segmentID int64
collection *Collection
sealed Segment
growing Segment
}
func (suite *SegmentSuite) SetupSuite() {
paramtable.Init()
}
func (suite *SegmentSuite) SetupTest() {
var err error
ctx := context.Background()
msgLength := 100
suite.rootPath = suite.T().Name()
chunkManagerFactory := storage.NewTestChunkManagerFactory(paramtable.Get(), suite.rootPath)
suite.chunkManager, _ = chunkManagerFactory.NewPersistentStorageChunkManager(ctx)
initcore.InitRemoteChunkManager(paramtable.Get())
suite.collectionID = 100
suite.partitionID = 10
suite.segmentID = 1
suite.manager = NewManager()
schema := GenTestCollectionSchema("test-reduce", schemapb.DataType_Int64, true)
indexMeta := GenTestIndexMeta(suite.collectionID, schema)
suite.manager.Collection.PutOrRef(suite.collectionID,
schema,
indexMeta,
&querypb.LoadMetaInfo{
LoadType: querypb.LoadType_LoadCollection,
CollectionID: suite.collectionID,
PartitionIDs: []int64{suite.partitionID},
},
)
suite.collection = suite.manager.Collection.Get(suite.collectionID)
suite.sealed, err = NewSegment(ctx,
suite.collection,
SegmentTypeSealed,
0,
&querypb.SegmentLoadInfo{
CollectionID: suite.collectionID,
SegmentID: suite.segmentID,
PartitionID: suite.partitionID,
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID),
Level: datapb.SegmentLevel_Legacy,
NumOfRows: int64(msgLength),
BinlogPaths: []*datapb.FieldBinlog{
{
FieldID: 101,
Binlogs: []*datapb.Binlog{
{
LogSize: 10086,
},
},
},
},
},
)
suite.Require().NoError(err)
binlogs, _, err := SaveBinLog(ctx,
suite.collectionID,
suite.partitionID,
suite.segmentID,
msgLength,
schema,
suite.chunkManager,
)
suite.Require().NoError(err)
g, err := suite.sealed.(*LocalSegment).StartLoadData()
suite.Require().NoError(err)
for _, binlog := range binlogs {
err = suite.sealed.(*LocalSegment).LoadFieldData(ctx, binlog.FieldID, int64(msgLength), binlog)
suite.Require().NoError(err)
}
g.Done(nil)
suite.growing, err = NewSegment(ctx,
suite.collection,
SegmentTypeGrowing,
0,
&querypb.SegmentLoadInfo{
SegmentID: suite.segmentID + 1,
CollectionID: suite.collectionID,
PartitionID: suite.partitionID,
InsertChannel: fmt.Sprintf("by-dev-rootcoord-dml_0_%dv0", suite.collectionID),
Level: datapb.SegmentLevel_Legacy,
},
)
suite.Require().NoError(err)
insertMsg, err := genInsertMsg(suite.collection, suite.partitionID, suite.growing.ID(), msgLength)
suite.Require().NoError(err)
insertRecord, err := storage.TransferInsertMsgToInsertRecord(suite.collection.Schema(), insertMsg)
suite.Require().NoError(err)
err = suite.growing.Insert(ctx, insertMsg.RowIDs, insertMsg.Timestamps, insertRecord)
suite.Require().NoError(err)
suite.manager.Segment.Put(context.Background(), SegmentTypeSealed, suite.sealed)
suite.manager.Segment.Put(context.Background(), SegmentTypeGrowing, suite.growing)
}
func (suite *SegmentSuite) TearDownTest() {
ctx := context.Background()
suite.sealed.Release(context.Background())
suite.growing.Release(context.Background())
DeleteCollection(suite.collection)
suite.chunkManager.RemoveWithPrefix(ctx, suite.rootPath)
}
func (suite *SegmentSuite) TestLoadInfo() {
// sealed segment has load info
suite.NotNil(suite.sealed.LoadInfo())
// growing segment has no load info
suite.NotNil(suite.growing.LoadInfo())
}
func (suite *SegmentSuite) TestResourceUsageEstimate() {
// growing segment has resource usage
// growing segment can not estimate resource usage
usage := suite.growing.ResourceUsageEstimate()
suite.Zero(usage.MemorySize)
suite.Zero(usage.DiskSize)
// growing segment has no resource usage
usage = suite.sealed.ResourceUsageEstimate()
suite.NotZero(usage.MemorySize)
suite.Zero(usage.DiskSize)
suite.Zero(usage.MmapFieldCount)
}
func (suite *SegmentSuite) TestDelete() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
pks, err := storage.GenInt64PrimaryKeys(0, 1)
suite.NoError(err)
// Test for sealed
rowNum := suite.sealed.RowNum()
err = suite.sealed.Delete(ctx, pks, []uint64{1000, 1000})
suite.NoError(err)
suite.Equal(rowNum-int64(len(pks)), suite.sealed.RowNum())
suite.Equal(rowNum, suite.sealed.InsertCount())
// Test for growing
rowNum = suite.growing.RowNum()
err = suite.growing.Delete(ctx, pks, []uint64{1000, 1000})
suite.NoError(err)
suite.Equal(rowNum-int64(len(pks)), suite.growing.RowNum())
suite.Equal(rowNum, suite.growing.InsertCount())
}
func (suite *SegmentSuite) TestHasRawData() {
has := suite.growing.HasRawData(simpleFloatVecField.id)
suite.True(has)
has = suite.sealed.HasRawData(simpleFloatVecField.id)
suite.True(has)
}
func (suite *SegmentSuite) TestLocation() {
pk := storage.NewInt64PrimaryKey(100)
locations := storage.Locations(pk, suite.sealed.GetHashFuncNum())
ret1 := suite.sealed.TestLocations(pk, locations)
ret2 := suite.sealed.MayPkExist(pk)
suite.Equal(ret1, ret2)
}
func (suite *SegmentSuite) TestCASVersion() {
segment := suite.sealed
curVersion := segment.Version()
suite.False(segment.CASVersion(curVersion-1, curVersion+1))
suite.NotEqual(curVersion+1, segment.Version())
suite.True(segment.CASVersion(curVersion, curVersion+1))
suite.Equal(curVersion+1, segment.Version())
}
func (suite *SegmentSuite) TestSegmentRemoveUnusedFieldFiles() {
}
func (suite *SegmentSuite) TestSegmentReleased() {
suite.sealed.Release(context.Background())
sealed := suite.sealed.(*LocalSegment)
suite.False(sealed.ptrLock.PinIfNotReleased())
suite.EqualValues(0, sealed.RowNum())
suite.EqualValues(0, sealed.MemSize())
suite.False(sealed.HasRawData(101))
}
func TestSegment(t *testing.T) {
suite.Run(t, new(SegmentSuite))
}