From 243346247bfdede87d398da7cb7c0eda4cd8ee56 Mon Sep 17 00:00:00 2001 From: dragondriver Date: Tue, 29 Dec 2020 16:31:03 +0800 Subject: [PATCH] Add unittest case which used in regression to index builder Signed-off-by: dragondriver --- .../core/src/indexbuilder/IndexWrapper.cpp | 2 - internal/core/src/indexbuilder/index_c.cpp | 7 - internal/core/unittest/test_index_wrapper.cpp | 239 ++++++++---------- internal/indexbuilder/index.go | 19 +- internal/indexbuilder/index_test.go | 169 +++++++++++-- internal/proxy/validate_util.go | 2 +- 6 files changed, 260 insertions(+), 178 deletions(-) diff --git a/internal/core/src/indexbuilder/IndexWrapper.cpp b/internal/core/src/indexbuilder/IndexWrapper.cpp index ed37ec5a04..f23fcbd945 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.cpp +++ b/internal/core/src/indexbuilder/IndexWrapper.cpp @@ -25,8 +25,6 @@ namespace indexbuilder { IndexWrapper::IndexWrapper(const char* serialized_type_params, const char* serialized_index_params) { type_params_ = std::string(serialized_type_params); index_params_ = std::string(serialized_index_params); - // std::cout << "type_params_.size(): " << type_params_.size() << std::endl; - // std::cout << "index_params_.size(): " << index_params_.size() << std::endl; parse(); diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index 9adf504e33..b21dd7cd05 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -28,13 +28,6 @@ class CGODebugUtils { CIndex CreateIndex(const char* serialized_type_params, const char* serialized_index_params) { - // std::cout << "strlen(serialized_type_params): " << CGODebugUtils::Strlen(serialized_type_params, - // type_params_size) - // << std::endl; - // std::cout << "type_params_size: " << type_params_size << std::endl; - // std::cout << "strlen(serialized_index_params): " - // << CGODebugUtils::Strlen(serialized_index_params, index_params_size) << std::endl; - // std::cout << "index_params_size: " << index_params_size << std::endl; auto index = std::make_unique(serialized_type_params, serialized_index_params); return index.release(); diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index ffbc340f86..74eed0418e 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "pb/index_cgo_msg.pb.h" #include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h" @@ -116,9 +117,9 @@ class IndexWrapperTest : public ::testing::TestWithParam { is_binary = is_binary_map[index_type]; bool ok; - ok = type_params.SerializeToString(&type_params_str); + ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str); assert(ok); - ok = index_params.SerializeToString(&index_params_str); + ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); assert(ok); auto dataset = GenDataset(NB, metric_type, is_binary); @@ -190,99 +191,71 @@ TEST(BINIDMAP, Build) { ASSERT_NO_THROW(index->BuildAll(xb_dataset, conf)); } -// TEST(PQWrapper, Build) { -// auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ; -// auto metric_type = milvus::knowhere::Metric::L2; -// indexcgo::TypeParams type_params; -// indexcgo::IndexParams index_params; -// std::tie(type_params, index_params) = generate_params(index_type, metric_type); -// std::string type_params_str, index_params_str; -// bool ok; -// ok = type_params.SerializeToString(&type_params_str); -// assert(ok); -// ok = index_params.SerializeToString(&index_params_str); -// assert(ok); -// auto dataset = GenDataset(NB, metric_type, false); -// auto xb_data = dataset.get_col(0); -// auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); -// auto index = std::make_unique( -// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); -// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); -//} +TEST(PQWrapper, Build) { + auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ; + auto metric_type = milvus::knowhere::Metric::L2; + indexcgo::TypeParams type_params; + indexcgo::IndexParams index_params; + std::tie(type_params, index_params) = generate_params(index_type, metric_type); + std::string type_params_str, index_params_str; + bool ok; + ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str); + assert(ok); + ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); + assert(ok); + auto dataset = GenDataset(NB, metric_type, false); + auto xb_data = dataset.get_col(0); + auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); +} -// TEST(PQCGO, Params) { -// std::vector type_params; -// std::vector index_params{10, 10, 10, 5, 110, 98, 105, 116, 115, 18, 1, 56, 10, 17, 10, 11, 109, -// 101, 116, 114, 105, 99, 95, 116, 121, 112, 101, 18, 2, 76, 50, 10, 20, 10, -// 10, 105, 110, 100, 101, 120, 95, 116, 121, 112, 101, 18, 6, 73, 86, 70, 95, -// 80, 81, 10, 8, 10, 3, 100, 105, 109, 18, 1, 56, 10, 12, 10, 5, 110, -// 108, 105, 115, 116, 18, 3, 49, 48, 48, 10, 6, 10, 1, 109, 18, 1, 52}; -// auto index = std::make_unique(type_params.data(), type_params.size(), -// index_params.data(), index_params.size()); -// -// auto dim = index->dim(); -// auto dataset = GenDataset(NB, METRIC_TYPE, false, dim); -// auto xb_data = dataset.get_col(0); -// auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); -// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); -//} +TEST(BinFlatWrapper, Build) { + auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT; + auto metric_type = milvus::knowhere::Metric::JACCARD; + indexcgo::TypeParams type_params; + indexcgo::IndexParams index_params; + std::tie(type_params, index_params) = generate_params(index_type, metric_type); + std::string type_params_str, index_params_str; + bool ok; + ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str); + assert(ok); + ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); + assert(ok); + auto dataset = GenDataset(NB, metric_type, true); + auto xb_data = dataset.get_col(0); + std::vector ids(NB, 0); + std::iota(ids.begin(), ids.end(), 0); + auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data()); + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); + ASSERT_NO_THROW(index->BuildWithIds(xb_dataset)); +} -// TEST(PQCGOWrapper, Params) { -// std::vector type_params; -// std::vector index_params{10, 10, 10, 5, 110, 98, 105, 116, 115, 18, 1, 56, 10, 17, 10, 11, 109, -// 101, 116, 114, 105, 99, 95, 116, 121, 112, 101, 18, 2, 76, 50, 10, 20, 10, -// 10, 105, 110, 100, 101, 120, 95, 116, 121, 112, 101, 18, 6, 73, 86, 70, 95, -// 80, 81, 10, 8, 10, 3, 100, 105, 109, 18, 1, 56, 10, 12, 10, 5, 110, -// 108, 105, 115, 116, 18, 3, 49, 48, 48, 10, 6, 10, 1, 109, 18, 1, 52}; -// auto index = CreateIndex(type_params.data(), type_params.size(), index_params.data(), index_params.size()); -// DeleteIndex(index); -//} - -// TEST(BinFlatWrapper, Build) { -// auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT; -// auto metric_type = milvus::knowhere::Metric::JACCARD; -// indexcgo::TypeParams type_params; -// indexcgo::IndexParams index_params; -// std::tie(type_params, index_params) = generate_params(index_type, metric_type); -// std::string type_params_str, index_params_str; -// bool ok; -// ok = type_params.SerializeToString(&type_params_str); -// assert(ok); -// ok = index_params.SerializeToString(&index_params_str); -// assert(ok); -// auto dataset = GenDataset(NB, metric_type, true); -// auto xb_data = dataset.get_col(0); -// std::vector ids(NB, 0); -// std::iota(ids.begin(), ids.end(), 0); -// auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data()); -// auto index = std::make_unique( -// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); -// ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); -// ASSERT_NO_THROW(index->BuildWithIds(xb_dataset)); -//} - -// TEST(BinIdMapWrapper, Build) { -// auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP; -// auto metric_type = milvus::knowhere::Metric::JACCARD; -// indexcgo::TypeParams type_params; -// indexcgo::IndexParams index_params; -// std::tie(type_params, index_params) = generate_params(index_type, metric_type); -// std::string type_params_str, index_params_str; -// bool ok; -// ok = type_params.SerializeToString(&type_params_str); -// assert(ok); -// ok = index_params.SerializeToString(&index_params_str); -// assert(ok); -// auto dataset = GenDataset(NB, metric_type, true); -// auto xb_data = dataset.get_col(0); -// std::vector ids(NB, 0); -// std::iota(ids.begin(), ids.end(), 0); -// auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data()); -// auto index = std::make_unique( -// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); -// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); -// ASSERT_NO_THROW(index->BuildWithIds(xb_dataset)); -//} +TEST(BinIdMapWrapper, Build) { + auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP; + auto metric_type = milvus::knowhere::Metric::JACCARD; + indexcgo::TypeParams type_params; + indexcgo::IndexParams index_params; + std::tie(type_params, index_params) = generate_params(index_type, metric_type); + std::string type_params_str, index_params_str; + bool ok; + ok = google::protobuf::TextFormat::PrintToString(type_params, &type_params_str); + assert(ok); + ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); + assert(ok); + auto dataset = GenDataset(NB, metric_type, true); + auto xb_data = dataset.get_col(0); + std::vector ids(NB, 0); + std::iota(ids.begin(), ids.end(), 0); + auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data()); + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); + ASSERT_NO_THROW(index->BuildWithIds(xb_dataset)); +} INSTANTIATE_TEST_CASE_P(IndexTypeParameters, IndexWrapperTest, @@ -293,46 +266,46 @@ INSTANTIATE_TEST_CASE_P(IndexTypeParameters, std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, milvus::knowhere::Metric::JACCARD))); -// TEST_P(IndexWrapperTest, Constructor) { -// auto index = std::make_unique( -// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); -//} +TEST_P(IndexWrapperTest, Constructor) { + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); +} -// TEST_P(IndexWrapperTest, Dim) { -// auto index = std::make_unique( -// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); -// -// ASSERT_EQ(index->dim(), DIM); -//} +TEST_P(IndexWrapperTest, Dim) { + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); -// TEST_P(IndexWrapperTest, BuildWithoutIds) { -// auto index = std::make_unique( -// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); -// -// if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { -// ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); -// } else { -// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); -// } -//} + ASSERT_EQ(index->dim(), DIM); +} -// TEST_P(IndexWrapperTest, Codec) { -// auto index = std::make_unique( -// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); -// -// if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { -// ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); -// ASSERT_NO_THROW(index->BuildWithIds(xb_dataset)); -// } else { -// ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); -// } -// -// auto binary = index->Serialize(); -// auto copy_index = std::make_unique( -// type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); -// ASSERT_NO_THROW(copy_index->Load(binary.data, binary.size)); -// ASSERT_EQ(copy_index->dim(), copy_index->dim()); -// auto copy_binary = copy_index->Serialize(); -// ASSERT_EQ(binary.size, copy_binary.size); -// ASSERT_EQ(strcmp(binary.data, copy_binary.data), 0); -//} +TEST_P(IndexWrapperTest, BuildWithoutIds) { + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + + if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { + ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); + } else { + ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); + } +} + +TEST_P(IndexWrapperTest, Codec) { + auto index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + + if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { + ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); + ASSERT_NO_THROW(index->BuildWithIds(xb_dataset)); + } else { + ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); + } + + auto binary = index->Serialize(); + auto copy_index = + std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + ASSERT_NO_THROW(copy_index->Load(binary.data, binary.size)); + ASSERT_EQ(copy_index->dim(), copy_index->dim()); + auto copy_binary = copy_index->Serialize(); + ASSERT_EQ(binary.size, copy_binary.size); + ASSERT_EQ(strcmp(binary.data, copy_binary.data), 0); +} diff --git a/internal/indexbuilder/index.go b/internal/indexbuilder/index.go index 0ed411df4d..f79543a422 100644 --- a/internal/indexbuilder/index.go +++ b/internal/indexbuilder/index.go @@ -123,30 +123,13 @@ func NewCIndex(typeParams, indexParams map[string]string) (Index, error) { } indexParamsStr := proto.MarshalTextString(protoIndexParams) - //print := func(param []byte) { - // for i, c := range param { - // fmt.Print(c) - // fmt.Print(", ") - // if i % 25 == 0 { - // fmt.Println() - // } - // } - // fmt.Println() - //} - //print(typeParamsStr) - //fmt.Println("len(typeParamsStr): ", len(typeParamsStr)) - //print(indexParamsStr) - //fmt.Println("len(indexParamsStr): ", len(indexParamsStr)) - typeParamsPointer := C.CString(typeParamsStr) indexParamsPointer := C.CString(indexParamsStr) /* CIndex CreateIndex(const char* serialized_type_params, - int64_t type_params_size, - const char* serialized_index_params - int64_t index_params_size); + const char* serialized_index_params); */ return &CIndex{ indexPtr: C.CreateIndex(typeParamsPointer, indexParamsPointer), diff --git a/internal/indexbuilder/index_test.go b/internal/indexbuilder/index_test.go index a5e5eb2ca5..3ece0330d7 100644 --- a/internal/indexbuilder/index_test.go +++ b/internal/indexbuilder/index_test.go @@ -2,33 +2,168 @@ package indexbuilder import ( "github.com/stretchr/testify/assert" + "math/rand" "strconv" "testing" ) const ( - indexType = "IVF_PQ" - dim = 8 - nlist = 100 - m = 4 - nbits = 8 - metricType = "L2" + IvfPq = "IVF_PQ" + BinFlat = "BIN_FLAT" + dim = 8 + nlist = 100 + m = 4 + nbits = 8 + L2 = "L2" + Jaccard = "JACCARD" + nb = 8 * 10000 ) -func TestIndex_New(t *testing.T) { +type testCase struct { + indexType string + metricType string + isBinary bool +} + +func generateFloatVectorTestCases() []testCase { + return []testCase{ + {IvfPq, L2, false}, + } +} + +func generateBinaryVectorTestCases() []testCase { + return []testCase{ + {BinFlat, Jaccard, true}, + } +} + +func generateTestCases() []testCase { + return append(generateFloatVectorTestCases(), generateBinaryVectorTestCases()...) +} + +func generateParams(indexType, metricType string) (map[string]string, map[string]string) { typeParams := make(map[string]string) indexParams := make(map[string]string) indexParams["index_type"] = indexType - indexParams["dim"] = strconv.Itoa(dim) - indexParams["nlist"] = strconv.Itoa(nlist) - indexParams["m"] = strconv.Itoa(m) - indexParams["nbits"] = strconv.Itoa(nbits) indexParams["metric_type"] = metricType + if indexType == IvfPq { + indexParams["dim"] = strconv.Itoa(dim) + indexParams["nlist"] = strconv.Itoa(nlist) + indexParams["m"] = strconv.Itoa(m) + indexParams["nbits"] = strconv.Itoa(nbits) + } else if indexType == BinFlat { + indexParams["dim"] = strconv.Itoa(dim) + } - index, err := NewCIndex(typeParams, indexParams) - assert.Equal(t, err, nil) - assert.NotEqual(t, index, nil) - - err = index.Delete() - assert.Equal(t, err, nil) + return typeParams, indexParams +} + +func generateFloatVectors() []float32 { + vectors := make([]float32, 0) + for i := 0; i < nb; i++ { + vectors = append(vectors, rand.Float32()) + } + return vectors +} + +func generateBinaryVectors() []byte { + vectors := make([]byte, 0) + for i := 0; i < nb/8; i++ { + vectors = append(vectors, byte(rand.Intn(8))) + } + return vectors +} + +func TestCIndex_New(t *testing.T) { + for _, c := range generateTestCases() { + typeParams, indexParams := generateParams(c.indexType, c.metricType) + + index, err := NewCIndex(typeParams, indexParams) + assert.Equal(t, err, nil) + assert.NotEqual(t, index, nil) + + err = index.Delete() + assert.Equal(t, err, nil) + } +} + +func TestCIndex_BuildFloatVecIndexWithoutIds(t *testing.T) { + for _, c := range generateFloatVectorTestCases() { + typeParams, indexParams := generateParams(c.indexType, c.metricType) + + index, err := NewCIndex(typeParams, indexParams) + assert.Equal(t, err, nil) + assert.NotEqual(t, index, nil) + + vectors := generateFloatVectors() + err = index.BuildFloatVecIndexWithoutIds(vectors) + assert.Equal(t, err, nil) + + err = index.Delete() + assert.Equal(t, err, nil) + } +} + +func TestCIndex_BuildBinaryVecIndexWithoutIds(t *testing.T) { + for _, c := range generateBinaryVectorTestCases() { + typeParams, indexParams := generateParams(c.indexType, c.metricType) + + index, err := NewCIndex(typeParams, indexParams) + assert.Equal(t, err, nil) + assert.NotEqual(t, index, nil) + + vectors := generateBinaryVectors() + err = index.BuildBinaryVecIndexWithoutIds(vectors) + assert.Equal(t, err, nil) + + err = index.Delete() + assert.Equal(t, err, nil) + } +} + +func TestCIndex_Codec(t *testing.T) { + for _, c := range generateTestCases() { + typeParams, indexParams := generateParams(c.indexType, c.metricType) + + index, err := NewCIndex(typeParams, indexParams) + assert.Equal(t, err, nil) + assert.NotEqual(t, index, nil) + + if !c.isBinary { + vectors := generateFloatVectors() + err = index.BuildFloatVecIndexWithoutIds(vectors) + assert.Equal(t, err, nil) + } else { + vectors := generateBinaryVectors() + err = index.BuildBinaryVecIndexWithoutIds(vectors) + assert.Equal(t, err, nil) + } + + blobs, err := index.Serialize() + assert.Equal(t, err, nil) + + copyIndex, err := NewCIndex(typeParams, indexParams) + err = copyIndex.Load(blobs) + assert.Equal(t, err, nil) + copyBlobs, err := copyIndex.Serialize() + assert.Equal(t, err, nil) + assert.Equal(t, len(blobs), len(copyBlobs)) + // TODO: check key, value and more + + err = index.Delete() + assert.Equal(t, err, nil) + } +} + +func TestCIndex_Delete(t *testing.T) { + for _, c := range generateTestCases() { + typeParams, indexParams := generateParams(c.indexType, c.metricType) + + index, err := NewCIndex(typeParams, indexParams) + assert.Equal(t, err, nil) + assert.NotEqual(t, index, nil) + + err = index.Delete() + assert.Equal(t, err, nil) + } } diff --git a/internal/proxy/validate_util.go b/internal/proxy/validate_util.go index a094d5d5a1..d659f45b42 100644 --- a/internal/proxy/validate_util.go +++ b/internal/proxy/validate_util.go @@ -130,7 +130,7 @@ func ValidateDimension(dim int64, isBinary bool) error { } func ValidateVectorFieldMetricType(field *schemapb.FieldSchema) error { - if (field.DataType != schemapb.DataType_VECTOR_FLOAT) && (field.DataType != schemapb.DataType_VECTOR_BINARY) { + if field.DataType != schemapb.DataType_VECTOR_FLOAT { return nil } for _, params := range field.IndexParams {