From 3cc8ee298e2b0fa27daa28abc31dcb9899be944f Mon Sep 17 00:00:00 2001 From: dragondriver Date: Tue, 27 Jul 2021 10:19:21 +0800 Subject: [PATCH] Filter NaN when reducing search results (#6806) Signed-off-by: dragondriver --- internal/proxy/task.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/internal/proxy/task.go b/internal/proxy/task.go index 86c1687633..ba53b1a63a 100644 --- a/internal/proxy/task.go +++ b/internal/proxy/task.go @@ -1698,6 +1698,10 @@ func reduceSearchResultDataParallel(searchResultData []*schemapb.SearchResultDat continue } distance := searchResultData[q].Scores[idx*topk+loc] + // https://github.com/milvus-io/milvus/issues/6781 + if math.IsNaN(float64(distance)) { + continue + } if distance > maxDistance || (math.Abs(float64(distance-maxDistance)) < math.SmallestNonzeroFloat32 && choice != q) { choice = q maxDistance = distance @@ -1710,7 +1714,12 @@ func reduceSearchResultDataParallel(searchResultData []*schemapb.SearchResultDat choiceOffset := locs[choice] // check if distance is valid, `invalid` here means very very big, // in this process, distance here is the smallest, so the rest of distance are all invalid - if searchResultData[choice].Scores[idx*topk+choiceOffset] <= minFloat32 { + // https://github.com/milvus-io/milvus/issues/6781 + // tanimoto distance between two binary vectors maybe -inf, so -inf distance shouldn't be filtered, + // otherwise it will cause that the number of hit records is less than needed (topk). + // in the above process, we have already filtered NaN distance. + distance := searchResultData[choice].Scores[idx*topk+choiceOffset] + if distance < minFloat32 { break } curIdx := idx*topk + choiceOffset