mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
Add query node segment filter (#7303)
Signed-off-by: godchen <qingxiang.chen@zilliz.com>
This commit is contained in:
parent
2dc2cb1a28
commit
c333af0dcf
@ -13,6 +13,8 @@ package datanode
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/binary"
|
||||||
|
"errors"
|
||||||
|
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
@ -55,6 +57,27 @@ func (ddn *deleteNode) Operate(in []Msg) []Msg {
|
|||||||
return []Msg{}
|
return []Msg{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getSegmentsByPKs(pks []int64, segments []*Segment) (map[int64][]int64, error) {
|
||||||
|
if pks == nil {
|
||||||
|
return nil, errors.New("pks is nil when getSegmentsByPKs")
|
||||||
|
}
|
||||||
|
if segments == nil {
|
||||||
|
return nil, errors.New("segments is nil when getSegmentsByPKs")
|
||||||
|
}
|
||||||
|
results := make(map[int64][]int64)
|
||||||
|
buf := make([]byte, 8)
|
||||||
|
for _, segment := range segments {
|
||||||
|
for _, pk := range pks {
|
||||||
|
binary.BigEndian.PutUint64(buf, uint64(pk))
|
||||||
|
exist := segment.pkFilter.Test(buf)
|
||||||
|
if exist {
|
||||||
|
results[segment.segmentID] = append(results[segment.segmentID], pk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
func newDeleteDNode(ctx context.Context, replica Replica) *deleteNode {
|
func newDeleteDNode(ctx context.Context, replica Replica) *deleteNode {
|
||||||
baseNode := BaseNode{}
|
baseNode := BaseNode{}
|
||||||
baseNode.SetMaxParallelism(Params.FlowGraphMaxQueueLength)
|
baseNode.SetMaxParallelism(Params.FlowGraphMaxQueueLength)
|
||||||
|
|||||||
@ -13,8 +13,10 @@ package datanode
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/binary"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/bits-and-blooms/bloom/v3"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -35,3 +37,53 @@ func TestFlowGraphDeleteNode_Operate_Invalid_Size(t *testing.T) {
|
|||||||
result := deleteNode.Operate([]Msg{Msg1, Msg2})
|
result := deleteNode.Operate([]Msg{Msg1, Msg2})
|
||||||
assert.Equal(t, len(result), 0)
|
assert.Equal(t, len(result), 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetSegmentsByPKs(t *testing.T) {
|
||||||
|
buf := make([]byte, 8)
|
||||||
|
filter1 := bloom.NewWithEstimates(1000000, 0.01)
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
binary.BigEndian.PutUint64(buf, uint64(i))
|
||||||
|
filter1.Add(buf)
|
||||||
|
}
|
||||||
|
filter2 := bloom.NewWithEstimates(1000000, 0.01)
|
||||||
|
for i := 3; i < 5; i++ {
|
||||||
|
binary.BigEndian.PutUint64(buf, uint64(i))
|
||||||
|
filter2.Add(buf)
|
||||||
|
}
|
||||||
|
segment1 := &Segment{
|
||||||
|
segmentID: 1,
|
||||||
|
pkFilter: filter1,
|
||||||
|
}
|
||||||
|
segment2 := &Segment{
|
||||||
|
segmentID: 2,
|
||||||
|
pkFilter: filter1,
|
||||||
|
}
|
||||||
|
segment3 := &Segment{
|
||||||
|
segmentID: 3,
|
||||||
|
pkFilter: filter1,
|
||||||
|
}
|
||||||
|
segment4 := &Segment{
|
||||||
|
segmentID: 4,
|
||||||
|
pkFilter: filter2,
|
||||||
|
}
|
||||||
|
segment5 := &Segment{
|
||||||
|
segmentID: 5,
|
||||||
|
pkFilter: filter2,
|
||||||
|
}
|
||||||
|
segments := []*Segment{segment1, segment2, segment3, segment4, segment5}
|
||||||
|
results, err := getSegmentsByPKs([]int64{0, 1, 2, 3, 4}, segments)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
expected := map[int64][]int64{
|
||||||
|
1: {0, 1, 2},
|
||||||
|
2: {0, 1, 2},
|
||||||
|
3: {0, 1, 2},
|
||||||
|
4: {3, 4},
|
||||||
|
5: {3, 4},
|
||||||
|
}
|
||||||
|
assert.Equal(t, expected, results)
|
||||||
|
|
||||||
|
_, err = getSegmentsByPKs(nil, segments)
|
||||||
|
assert.NotNil(t, err)
|
||||||
|
_, err = getSegmentsByPKs([]int64{0, 1, 2, 3, 4}, nil)
|
||||||
|
assert.NotNil(t, err)
|
||||||
|
}
|
||||||
|
|||||||
@ -1136,6 +1136,27 @@ func (q *queryCollection) retrieve(msg queryMsg) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getSegmentsByPKs(pks []int64, segments []*Segment) (map[int64][]int64, error) {
|
||||||
|
if pks == nil {
|
||||||
|
return nil, fmt.Errorf("pks is nil when getSegmentsByPKs")
|
||||||
|
}
|
||||||
|
if segments == nil {
|
||||||
|
return nil, fmt.Errorf("segments is nil when getSegmentsByPKs")
|
||||||
|
}
|
||||||
|
results := make(map[int64][]int64)
|
||||||
|
buf := make([]byte, 8)
|
||||||
|
for _, segment := range segments {
|
||||||
|
for _, pk := range pks {
|
||||||
|
binary.BigEndian.PutUint64(buf, uint64(pk))
|
||||||
|
exist := segment.pkFilter.Test(buf)
|
||||||
|
if exist {
|
||||||
|
results[segment.segmentID] = append(results[segment.segmentID], pk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
func mergeRetrieveResults(dataArr []*segcorepb.RetrieveResults) (*segcorepb.RetrieveResults, error) {
|
func mergeRetrieveResults(dataArr []*segcorepb.RetrieveResults) (*segcorepb.RetrieveResults, error) {
|
||||||
var final *segcorepb.RetrieveResults
|
var final *segcorepb.RetrieveResults
|
||||||
for _, data := range dataArr {
|
for _, data := range dataArr {
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import (
|
|||||||
"math/rand"
|
"math/rand"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/bits-and-blooms/bloom/v3"
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
@ -128,3 +129,53 @@ func TestQueryCollection_withoutVChannel(t *testing.T) {
|
|||||||
historical.close()
|
historical.close()
|
||||||
streaming.close()
|
streaming.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetSegmentsByPKs(t *testing.T) {
|
||||||
|
buf := make([]byte, 8)
|
||||||
|
filter1 := bloom.NewWithEstimates(1000000, 0.01)
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
binary.BigEndian.PutUint64(buf, uint64(i))
|
||||||
|
filter1.Add(buf)
|
||||||
|
}
|
||||||
|
filter2 := bloom.NewWithEstimates(1000000, 0.01)
|
||||||
|
for i := 3; i < 5; i++ {
|
||||||
|
binary.BigEndian.PutUint64(buf, uint64(i))
|
||||||
|
filter2.Add(buf)
|
||||||
|
}
|
||||||
|
segment1 := &Segment{
|
||||||
|
segmentID: 1,
|
||||||
|
pkFilter: filter1,
|
||||||
|
}
|
||||||
|
segment2 := &Segment{
|
||||||
|
segmentID: 2,
|
||||||
|
pkFilter: filter1,
|
||||||
|
}
|
||||||
|
segment3 := &Segment{
|
||||||
|
segmentID: 3,
|
||||||
|
pkFilter: filter1,
|
||||||
|
}
|
||||||
|
segment4 := &Segment{
|
||||||
|
segmentID: 4,
|
||||||
|
pkFilter: filter2,
|
||||||
|
}
|
||||||
|
segment5 := &Segment{
|
||||||
|
segmentID: 5,
|
||||||
|
pkFilter: filter2,
|
||||||
|
}
|
||||||
|
segments := []*Segment{segment1, segment2, segment3, segment4, segment5}
|
||||||
|
results, err := getSegmentsByPKs([]int64{0, 1, 2, 3, 4}, segments)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
expected := map[int64][]int64{
|
||||||
|
1: {0, 1, 2},
|
||||||
|
2: {0, 1, 2},
|
||||||
|
3: {0, 1, 2},
|
||||||
|
4: {3, 4},
|
||||||
|
5: {3, 4},
|
||||||
|
}
|
||||||
|
assert.Equal(t, expected, results)
|
||||||
|
|
||||||
|
_, err = getSegmentsByPKs(nil, segments)
|
||||||
|
assert.NotNil(t, err)
|
||||||
|
_, err = getSegmentsByPKs([]int64{0, 1, 2, 3, 4}, nil)
|
||||||
|
assert.NotNil(t, err)
|
||||||
|
}
|
||||||
|
|||||||
@ -29,6 +29,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/bits-and-blooms/bloom/v3"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
@ -90,6 +91,8 @@ type Segment struct {
|
|||||||
|
|
||||||
vectorFieldMutex sync.RWMutex // guards vectorFieldInfos
|
vectorFieldMutex sync.RWMutex // guards vectorFieldInfos
|
||||||
vectorFieldInfos map[UniqueID]*VectorFieldInfo
|
vectorFieldInfos map[UniqueID]*VectorFieldInfo
|
||||||
|
|
||||||
|
pkFilter *bloom.BloomFilter // bloom filter of pk inside a segment
|
||||||
}
|
}
|
||||||
|
|
||||||
//-------------------------------------------------------------------------------------- common interfaces
|
//-------------------------------------------------------------------------------------- common interfaces
|
||||||
|
|||||||
@ -1,29 +0,0 @@
|
|||||||
package segmentfilter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/bits-and-blooms/bloom/v3"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
||||||
)
|
|
||||||
|
|
||||||
// SegmentFilter is used to know which segments may have data corresponding
|
|
||||||
// to the primary key
|
|
||||||
type SegmentFilter struct {
|
|
||||||
segmentInfos []*datapb.SegmentInfo
|
|
||||||
bloomFilters []*bloom.BloomFilter
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewSegmentFilter(segmentInfos []*datapb.SegmentInfo) *SegmentFilter {
|
|
||||||
return &SegmentFilter{
|
|
||||||
segmentInfos: segmentInfos,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sf *SegmentFilter) init() {
|
|
||||||
panic("This method has not been implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetSegmentByPK pass a list of primary key and retrun an map of
|
|
||||||
// <segmentID, []string{primary_key}>
|
|
||||||
func (sf *SegmentFilter) GetSegmentByPK(pk []string) map[int64][]string {
|
|
||||||
panic("This method has not been implemented")
|
|
||||||
}
|
|
||||||
Loading…
x
Reference in New Issue
Block a user