mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
feat: Enable more VECTOR_INT8 unittest (#39569)
Issue: #38666 Signed-off-by: Cai Yudong <yudong.cai@zilliz.com>
This commit is contained in:
parent
c84a0748c4
commit
5730b69e56
@ -359,7 +359,7 @@ CreateVectorDataArray(int64_t count, const FieldMeta& field_meta) {
|
||||
case DataType::VECTOR_INT8: {
|
||||
auto length = count * dim;
|
||||
auto obj = vector_array->mutable_int8_vector();
|
||||
obj->resize(length * sizeof(int8));
|
||||
obj->resize(length);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "common/Types.h"
|
||||
#include "knowhere/comp/index_param.h"
|
||||
@ -71,6 +72,8 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
|
||||
"bf16_vec", DataType::VECTOR_BFLOAT16, 128, metric_type);
|
||||
auto sparse_vec = schema->AddDebugField(
|
||||
"sparse_vec", DataType::VECTOR_SPARSE_FLOAT, 128, metric_type);
|
||||
auto int8_vec = schema->AddDebugField(
|
||||
"int8_vec", DataType::VECTOR_INT8, 128, metric_type);
|
||||
schema->set_primary_field_id(int64_field);
|
||||
|
||||
std::map<std::string, std::string> index_params = {
|
||||
@ -136,6 +139,8 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
|
||||
segment->bulk_subscript(bf16_vec, ids_ds->GetIds(), num_inserted);
|
||||
auto sparse_vec_result =
|
||||
segment->bulk_subscript(sparse_vec, ids_ds->GetIds(), num_inserted);
|
||||
auto int8_vec_result =
|
||||
segment->bulk_subscript(int8_vec, ids_ds->GetIds(), num_inserted);
|
||||
|
||||
EXPECT_EQ(bool_result->scalars().bool_data().data_size(), num_inserted);
|
||||
EXPECT_EQ(int8_result->scalars().int_data().data_size(), num_inserted);
|
||||
@ -159,6 +164,8 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
|
||||
EXPECT_EQ(
|
||||
sparse_vec_result->vectors().sparse_float_vector().contents_size(),
|
||||
num_inserted);
|
||||
EXPECT_EQ(int8_vec_result->vectors().int8_vector().size(),
|
||||
num_inserted * dim);
|
||||
EXPECT_EQ(int_array_result->scalars().array_data().data_size(),
|
||||
num_inserted);
|
||||
EXPECT_EQ(long_array_result->scalars().array_data().data_size(),
|
||||
@ -184,24 +191,33 @@ TEST_F(ChunkVectorTest, FillDataWithMmap) {
|
||||
.data();
|
||||
auto sparse_vec_res = SparseBytesToRows(
|
||||
sparse_vec_result->vectors().sparse_float_vector().contents());
|
||||
auto int8_vec_res = (int8*)int8_vec_result.get()
|
||||
->mutable_vectors()
|
||||
->int8_vector()
|
||||
.data();
|
||||
EXPECT_TRUE(fp32_vec_res.size() == num_inserted * dim);
|
||||
auto fp32_vec_gt = dataset.get_col<float>(fp32_vec);
|
||||
auto fp16_vec_gt = dataset.get_col<float16>(fp16_vec);
|
||||
auto bf16_vec_gt = dataset.get_col<bfloat16>(bf16_vec);
|
||||
auto sparse_vec_gt =
|
||||
dataset.get_col<knowhere::sparse::SparseRow<float>>(sparse_vec);
|
||||
auto int8_vec_gt = dataset.get_col<int8>(int8_vec);
|
||||
|
||||
for (size_t i = 0; i < num_inserted; ++i) {
|
||||
auto id = ids_ds->GetIds()[i];
|
||||
// check dense vector
|
||||
for (size_t j = 0; j < 128; ++j) {
|
||||
EXPECT_TRUE(fp32_vec_res[i * dim + j] ==
|
||||
fp32_vec_gt[(id % per_batch) * dim + j]);
|
||||
EXPECT_TRUE(fp16_vec_res[i * dim + j] ==
|
||||
fp16_vec_gt[(id % per_batch) * dim + j]);
|
||||
EXPECT_TRUE(bf16_vec_res[i * dim + j] ==
|
||||
bf16_vec_gt[(id % per_batch) * dim + j]);
|
||||
}
|
||||
EXPECT_TRUE(memcmp((void*)(&fp32_vec_res[i * dim]),
|
||||
(void*)(&fp32_vec_gt[(id % per_batch) * dim]),
|
||||
sizeof(float) * dim) == 0);
|
||||
EXPECT_TRUE(memcmp((void*)(&fp16_vec_res[i * dim]),
|
||||
(void*)(&fp16_vec_gt[(id % per_batch) * dim]),
|
||||
sizeof(float16) * dim) == 0);
|
||||
EXPECT_TRUE(memcmp((void*)(&bf16_vec_res[i * dim]),
|
||||
(void*)(&bf16_vec_gt[(id % per_batch) * dim]),
|
||||
sizeof(bfloat16) * dim) == 0);
|
||||
EXPECT_TRUE(memcmp((void*)(&int8_vec_res[i * dim]),
|
||||
(void*)(&int8_vec_gt[(id % per_batch) * dim]),
|
||||
sizeof(int8) * dim) == 0);
|
||||
//check sparse vector
|
||||
auto actual_row = sparse_vec_res[i];
|
||||
auto expected_row = sparse_vec_gt[(id % per_batch)];
|
||||
|
||||
@ -49,6 +49,8 @@ class IndexLoadTest : public ::testing::TestWithParam<Param> {
|
||||
data_type = milvus::DataType::VECTOR_BINARY;
|
||||
} else if (field_type == "vector_sparse_float") {
|
||||
data_type = milvus::DataType::VECTOR_SPARSE_FLOAT;
|
||||
} else if (field_type == "vector_int8") {
|
||||
data_type = milvus::DataType::VECTOR_INT8;
|
||||
} else if (field_type == "array") {
|
||||
data_type = milvus::DataType::ARRAY;
|
||||
} else {
|
||||
@ -106,6 +108,22 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
{"mmap", "true"},
|
||||
{"field_type", "vector_fp16"}},
|
||||
{0.125f, 1.0f, 0.0f, 1.0f, true}),
|
||||
std::pair<std::map<std::string, std::string>, LoadResourceRequest>(
|
||||
{{"index_type", "HNSW"},
|
||||
{"metric_type", "L2"},
|
||||
{"efConstrcution", "300"},
|
||||
{"M", "30"},
|
||||
{"mmap", "false"},
|
||||
{"field_type", "vector_int8"}},
|
||||
{2.0f, 0.0f, 1.0f, 0.0f, true}),
|
||||
std::pair<std::map<std::string, std::string>, LoadResourceRequest>(
|
||||
{{"index_type", "HNSW"},
|
||||
{"metric_type", "L2"},
|
||||
{"efConstrcution", "300"},
|
||||
{"M", "30"},
|
||||
{"mmap", "true"},
|
||||
{"field_type", "vector_int8"}},
|
||||
{0.125f, 1.0f, 0.0f, 1.0f, true}),
|
||||
std::pair<std::map<std::string, std::string>, LoadResourceRequest>(
|
||||
{{"index_type", "IVFFLAT"},
|
||||
{"metric_type", "L2"},
|
||||
|
||||
@ -2197,6 +2197,8 @@ TEST(Sealed, QueryAllFields) {
|
||||
"float16_vec", DataType::VECTOR_FLOAT16, 128, metric_type);
|
||||
auto bfloat16_vec = schema->AddDebugField(
|
||||
"bfloat16_vec", DataType::VECTOR_BFLOAT16, 128, metric_type);
|
||||
auto int8_vec = schema->AddDebugField(
|
||||
"int8_vec", DataType::VECTOR_INT8, 128, metric_type);
|
||||
schema->set_primary_field_id(int64_field);
|
||||
|
||||
std::map<std::string, std::string> index_params = {
|
||||
@ -2235,6 +2237,7 @@ TEST(Sealed, QueryAllFields) {
|
||||
auto vector_values = dataset.get_col<float>(vec);
|
||||
auto float16_vector_values = dataset.get_col<uint8_t>(float16_vec);
|
||||
auto bfloat16_vector_values = dataset.get_col<uint8_t>(bfloat16_vec);
|
||||
auto int8_vector_values = dataset.get_col<int8>(int8_vec);
|
||||
|
||||
auto ids_ds = GenRandomIds(dataset_size);
|
||||
auto bool_result =
|
||||
@ -2273,6 +2276,8 @@ TEST(Sealed, QueryAllFields) {
|
||||
segment->bulk_subscript(float16_vec, ids_ds->GetIds(), dataset_size);
|
||||
auto bfloat16_vec_result =
|
||||
segment->bulk_subscript(bfloat16_vec, ids_ds->GetIds(), dataset_size);
|
||||
auto int8_vec_result =
|
||||
segment->bulk_subscript(int8_vec, ids_ds->GetIds(), dataset_size);
|
||||
|
||||
EXPECT_EQ(bool_result->scalars().bool_data().data_size(), dataset_size);
|
||||
EXPECT_EQ(int8_result->scalars().int_data().data_size(), dataset_size);
|
||||
@ -2290,6 +2295,8 @@ TEST(Sealed, QueryAllFields) {
|
||||
dataset_size * dim * 2);
|
||||
EXPECT_EQ(bfloat16_vec_result->vectors().bfloat16_vector().size(),
|
||||
dataset_size * dim * 2);
|
||||
EXPECT_EQ(int8_vec_result->vectors().int8_vector().size(),
|
||||
dataset_size * dim);
|
||||
EXPECT_EQ(int_array_result->scalars().array_data().data_size(),
|
||||
dataset_size);
|
||||
EXPECT_EQ(long_array_result->scalars().array_data().data_size(),
|
||||
|
||||
@ -429,6 +429,7 @@ inline GeneratedData DataGen(SchemaPtr schema,
|
||||
case DataType::VECTOR_INT8: {
|
||||
auto dim = field_meta.get_dim();
|
||||
vector<int8> final(dim * N);
|
||||
srand(seed);
|
||||
for (auto& x : final) {
|
||||
x = int8_t(rand() % 256 - 128);
|
||||
}
|
||||
|
||||
@ -191,6 +191,8 @@ func CreateSearchPlan(schema *typeutil.SchemaHelper, exprStr string, vectorField
|
||||
vectorType = planpb.VectorType_BFloat16Vector
|
||||
case schemapb.DataType_SparseFloatVector:
|
||||
vectorType = planpb.VectorType_SparseFloatVector
|
||||
case schemapb.DataType_Int8Vector:
|
||||
vectorType = planpb.VectorType_Int8Vector
|
||||
default:
|
||||
log.Error("Invalid dataType", zap.Any("dataType", dataType))
|
||||
return nil, err
|
||||
|
||||
@ -459,6 +459,13 @@ var serdeMap = func() map[schemapb.DataType]serdeEntry {
|
||||
fixedSizeDeserializer,
|
||||
fixedSizeSerializer,
|
||||
}
|
||||
m[schemapb.DataType_Int8Vector] = serdeEntry{
|
||||
func(i int) arrow.DataType {
|
||||
return &arrow.FixedSizeBinaryType{ByteWidth: i}
|
||||
},
|
||||
fixedSizeDeserializer,
|
||||
fixedSizeSerializer,
|
||||
}
|
||||
m[schemapb.DataType_FloatVector] = serdeEntry{
|
||||
func(i int) arrow.DataType {
|
||||
return &arrow.FixedSizeBinaryType{ByteWidth: i * 4}
|
||||
|
||||
@ -132,6 +132,8 @@ func (s *TestGetVectorSuite) run() {
|
||||
vecFieldData = integration.NewBFloat16VectorFieldData(vecFieldName, NB, dim)
|
||||
} else if typeutil.IsSparseFloatVectorType(s.vecType) {
|
||||
vecFieldData = integration.NewSparseFloatVectorFieldData(vecFieldName, NB)
|
||||
} else if s.vecType == schemapb.DataType_Int8Vector {
|
||||
vecFieldData = integration.NewInt8VectorFieldData(vecFieldName, NB, dim)
|
||||
} else {
|
||||
vecFieldData = integration.NewBinaryVectorFieldData(vecFieldName, NB, dim)
|
||||
}
|
||||
@ -294,6 +296,26 @@ func (s *TestGetVectorSuite) run() {
|
||||
s.Require().Equal(rawData[id], resData[i])
|
||||
}
|
||||
}
|
||||
} else if s.vecType == schemapb.DataType_Int8Vector {
|
||||
s.Require().Len(result.GetFieldsData()[vecFieldIndex].GetVectors().GetInt8Vector(), nq*topk*dim)
|
||||
rawData := vecFieldData.GetVectors().GetInt8Vector()
|
||||
resData := result.GetFieldsData()[vecFieldIndex].GetVectors().GetInt8Vector()
|
||||
rowBytes := dim
|
||||
if s.pkType == schemapb.DataType_Int64 {
|
||||
for i, id := range result.GetIds().GetIntId().GetData() {
|
||||
expect := rawData[int(id)*rowBytes : (int(id)+1)*rowBytes]
|
||||
actual := resData[i*rowBytes : (i+1)*rowBytes]
|
||||
s.Require().ElementsMatch(expect, actual)
|
||||
}
|
||||
} else {
|
||||
for i, idStr := range result.GetIds().GetStrId().GetData() {
|
||||
id, err := strconv.Atoi(idStr)
|
||||
s.Require().NoError(err)
|
||||
expect := rawData[id*rowBytes : (id+1)*rowBytes]
|
||||
actual := resData[i*rowBytes : (i+1)*rowBytes]
|
||||
s.Require().ElementsMatch(expect, actual)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
s.Require().Len(result.GetFieldsData()[vecFieldIndex].GetVectors().GetBinaryVector(), nq*topk*dim/8)
|
||||
rawData := vecFieldData.GetVectors().GetBinaryVector()
|
||||
@ -448,6 +470,16 @@ func (s *TestGetVectorSuite) TestGetVector_BFloat16Vector() {
|
||||
s.run()
|
||||
}
|
||||
|
||||
func (s *TestGetVectorSuite) TestGetVector_Int8Vector() {
|
||||
s.nq = 10
|
||||
s.topK = 10
|
||||
s.indexType = integration.IndexHNSW
|
||||
s.metricType = metric.L2
|
||||
s.pkType = schemapb.DataType_Int64
|
||||
s.vecType = schemapb.DataType_Int8Vector
|
||||
s.run()
|
||||
}
|
||||
|
||||
func (s *TestGetVectorSuite) TestGetVector_Big_NQ_TOPK() {
|
||||
s.T().Skip("skip big NQ Top due to timeout")
|
||||
s.nq = 10000
|
||||
|
||||
@ -244,6 +244,11 @@ func (s *BulkInsertSuite) TestMultiFileTypes() {
|
||||
s.metricType = metric.L2
|
||||
s.run()
|
||||
|
||||
s.vecType = schemapb.DataType_Int8Vector
|
||||
s.indexType = "HNSW"
|
||||
s.metricType = metric.L2
|
||||
s.run()
|
||||
|
||||
// TODO: not support numpy for SparseFloatVector by now
|
||||
if fileType != importutilv2.Numpy {
|
||||
s.vecType = schemapb.DataType_SparseFloatVector
|
||||
|
||||
@ -174,6 +174,17 @@ func GenerateNumpyFiles(cm storage.ChunkManager, schema *schemapb.CollectionSche
|
||||
data = chunkedRows
|
||||
case schemapb.DataType_SparseFloatVector:
|
||||
data = insertData.Data[fieldID].(*storage.SparseFloatVectorFieldData).GetContents()
|
||||
case schemapb.DataType_Int8Vector:
|
||||
rows := insertData.Data[fieldID].GetDataRows().([]int8)
|
||||
if dim != fieldData.(*storage.Int8VectorFieldData).Dim {
|
||||
panic(fmt.Sprintf("dim mis-match: %d, %d", dim, fieldData.(*storage.Int8VectorFieldData).Dim))
|
||||
}
|
||||
chunked := lo.Chunk(rows, dim)
|
||||
chunkedRows := make([][dim]int8, len(chunked))
|
||||
for i, innerSlice := range chunked {
|
||||
copy(chunkedRows[i][:], innerSlice)
|
||||
}
|
||||
data = chunkedRows
|
||||
default:
|
||||
data = insertData.Data[fieldID].GetDataRows()
|
||||
}
|
||||
|
||||
@ -155,6 +155,10 @@ func NewSparseFloatVectorFieldData(fieldName string, numRows int) *schemapb.Fiel
|
||||
return testutils.NewSparseFloatVectorFieldData(fieldName, numRows)
|
||||
}
|
||||
|
||||
func NewInt8VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData {
|
||||
return testutils.NewInt8VectorFieldData(fieldName, numRows, dim)
|
||||
}
|
||||
|
||||
func GenerateInt64Array(numRows int, start int64) []int64 {
|
||||
ret := make([]int64, numRows)
|
||||
for i := 0; i < numRows; i++ {
|
||||
|
||||
@ -34,6 +34,7 @@ import (
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
|
||||
"github.com/milvus-io/milvus/pkg/util/testutils"
|
||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -328,6 +329,13 @@ func constructPlaceholderGroup(nq, dim int, vectorType schemapb.DataType) *commo
|
||||
placeholderType = commonpb.PlaceholderType_SparseFloatVector
|
||||
sparseVecs := GenerateSparseFloatArray(nq)
|
||||
values = append(values, sparseVecs.Contents...)
|
||||
case schemapb.DataType_Int8Vector:
|
||||
placeholderType = commonpb.PlaceholderType_Int8Vector
|
||||
data := testutils.GenerateInt8Vectors(nq, dim)
|
||||
for i := 0; i < nq; i++ {
|
||||
rowBytes := dim
|
||||
values = append(values, typeutil.Int8ArrayToBytes(data[rowBytes*i:rowBytes*(i+1)]))
|
||||
}
|
||||
default:
|
||||
panic("invalid vector data type")
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user