From f67aae9596382d1e18b52a3a2c4d833b45eaac89 Mon Sep 17 00:00:00 2001 From: dragondriver Date: Mon, 28 Dec 2020 16:55:50 +0800 Subject: [PATCH] Fix the codec bug of cgo wrapper of index builder Signed-off-by: dragondriver --- .../core/src/indexbuilder/IndexWrapper.cpp | 22 +++--- internal/core/src/indexbuilder/IndexWrapper.h | 5 +- internal/core/src/indexbuilder/index_c.cpp | 27 ++++++- internal/core/src/indexbuilder/index_c.h | 5 +- internal/core/unittest/test_index_wrapper.cpp | 72 +++++++++++++------ internal/indexbuilder/index.go | 36 +++++++++- internal/indexbuilder/index_test.go | 34 +++++++++ 7 files changed, 165 insertions(+), 36 deletions(-) create mode 100644 internal/indexbuilder/index_test.go diff --git a/internal/core/src/indexbuilder/IndexWrapper.cpp b/internal/core/src/indexbuilder/IndexWrapper.cpp index e8b98cca5a..3ef491cd77 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.cpp +++ b/internal/core/src/indexbuilder/IndexWrapper.cpp @@ -21,9 +21,14 @@ namespace milvus { namespace indexbuilder { -IndexWrapper::IndexWrapper(const char* serialized_type_params, const char* serialized_index_params) { - type_params_ = std::string(serialized_type_params); - index_params_ = std::string(serialized_index_params); +IndexWrapper::IndexWrapper(const char* serialized_type_params, + int64_t type_params_size, + const char* serialized_index_params, + int64_t index_params_size) { + type_params_ = std::string(serialized_type_params, type_params_size); + index_params_ = std::string(serialized_index_params, index_params_size); + // std::cout << "type_params_.size(): " << type_params_.size() << std::endl; + // std::cout << "index_params_.size(): " << index_params_.size() << std::endl; parse(); @@ -35,20 +40,21 @@ IndexWrapper::IndexWrapper(const char* serialized_type_params, const char* seria auto index_mode = mode.has_value() ? mode_map[mode.value()] : knowhere::IndexMode::MODE_CPU; index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type, index_mode); + Assert(index_ != nullptr); } void IndexWrapper::parse() { namespace indexcgo = milvus::proto::indexcgo; - bool serialized_success; + bool deserialized_success; indexcgo::TypeParams type_config; - serialized_success = type_config.ParseFromString(type_params_); - Assert(serialized_success); + deserialized_success = type_config.ParseFromString(type_params_); + Assert(deserialized_success); indexcgo::IndexParams index_config; - serialized_success = index_config.ParseFromString(index_params_); - Assert(serialized_success); + deserialized_success = index_config.ParseFromString(index_params_); + Assert(deserialized_success); for (auto i = 0; i < type_config.params_size(); ++i) { auto type_param = type_config.params(i); diff --git a/internal/core/src/indexbuilder/IndexWrapper.h b/internal/core/src/indexbuilder/IndexWrapper.h index 93959c7f0d..6818b6d187 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.h +++ b/internal/core/src/indexbuilder/IndexWrapper.h @@ -18,7 +18,10 @@ namespace indexbuilder { class IndexWrapper { public: - explicit IndexWrapper(const char* serialized_type_params, const char* serialized_index_params); + explicit IndexWrapper(const char* serialized_type_params, + int64_t type_params_size, + const char* serialized_index_params, + int64_t index_params_size); int64_t dim(); diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index 6cd3cfbd63..a1d868fdce 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -14,9 +14,32 @@ #include "indexbuilder/IndexWrapper.h" #include "indexbuilder/index_c.h" +class CGODebugUtils { + public: + static int64_t + Strlen(const char* str, int64_t size) { + if (size == 0) { + return size; + } else { + return strlen(str); + } + } +}; + CIndex -CreateIndex(const char* serialized_type_params, const char* serialized_index_params) { - auto index = std::make_unique(serialized_type_params, serialized_index_params); +CreateIndex(const char* serialized_type_params, + int64_t type_params_size, + const char* serialized_index_params, + int64_t index_params_size) { + // std::cout << "strlen(serialized_type_params): " << CGODebugUtils::Strlen(serialized_type_params, + // type_params_size) + // << std::endl; + // std::cout << "type_params_size: " << type_params_size << std::endl; + // std::cout << "strlen(serialized_index_params): " + // << CGODebugUtils::Strlen(serialized_index_params, index_params_size) << std::endl; + // std::cout << "index_params_size: " << index_params_size << std::endl; + auto index = std::make_unique(serialized_type_params, type_params_size, + serialized_index_params, index_params_size); return index.release(); } diff --git a/internal/core/src/indexbuilder/index_c.h b/internal/core/src/indexbuilder/index_c.h index 9eca82d9a0..4ba2dfb255 100644 --- a/internal/core/src/indexbuilder/index_c.h +++ b/internal/core/src/indexbuilder/index_c.h @@ -35,7 +35,10 @@ typedef void* CIndex; // Solution: using protobuf instead of json, this way significantly increase programming efficiency CIndex -CreateIndex(const char* serialized_type_params, const char* serialized_index_params); +CreateIndex(const char* serialized_type_params, + int64_t type_params_size, + const char* serialized_index_params, + int64_t index_params_size); void DeleteIndex(CIndex index); diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index 973034c107..9bf153176c 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -36,7 +36,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, + // {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::nlist, 100}, // {milvus::knowhere::IndexParams::nprobe, 4}, {milvus::knowhere::IndexParams::m, 4}, @@ -47,7 +47,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, + // {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::nlist, 100}, // {milvus::knowhere::IndexParams::nprobe, 4}, {milvus::knowhere::IndexParams::m, 4}, @@ -58,7 +58,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, + // {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::Metric::TYPE, metric_type}, }; } @@ -86,14 +86,14 @@ generate_params(const milvus::knowhere::IndexType& index_type, const milvus::kno } auto -GenDataset(int64_t N, milvus::knowhere::MetricType metric_type, bool is_binary) { +GenDataset(int64_t N, milvus::knowhere::MetricType metric_type, bool is_binary, int64_t dim = DIM) { auto schema = std::make_shared(); auto faiss_metric_type = milvus::knowhere::GetMetricType(metric_type); if (!is_binary) { - schema->AddField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, DIM, faiss_metric_type); + schema->AddField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, dim, faiss_metric_type); return milvus::segcore::DataGen(schema, N); } else { - schema->AddField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, DIM, faiss_metric_type); + schema->AddField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, dim, faiss_metric_type); return milvus::segcore::DataGen(schema, N); } } @@ -205,11 +205,39 @@ TEST(PQWrapper, Build) { auto dataset = GenDataset(NB, metric_type, false); auto xb_data = dataset.get_col(0); auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); - auto index = - std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + auto index = std::make_unique( + type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); } +TEST(PQCGO, Params) { + std::vector type_params; + std::vector index_params{10, 10, 10, 5, 110, 98, 105, 116, 115, 18, 1, 56, 10, 17, 10, 11, 109, + 101, 116, 114, 105, 99, 95, 116, 121, 112, 101, 18, 2, 76, 50, 10, 20, 10, + 10, 105, 110, 100, 101, 120, 95, 116, 121, 112, 101, 18, 6, 73, 86, 70, 95, + 80, 81, 10, 8, 10, 3, 100, 105, 109, 18, 1, 56, 10, 12, 10, 5, 110, + 108, 105, 115, 116, 18, 3, 49, 48, 48, 10, 6, 10, 1, 109, 18, 1, 52}; + auto index = std::make_unique(type_params.data(), type_params.size(), + index_params.data(), index_params.size()); + + auto dim = index->dim(); + auto dataset = GenDataset(NB, METRIC_TYPE, false, dim); + auto xb_data = dataset.get_col(0); + auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); + ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); +} + +TEST(PQCGOWrapper, Params) { + std::vector type_params; + std::vector index_params{10, 10, 10, 5, 110, 98, 105, 116, 115, 18, 1, 56, 10, 17, 10, 11, 109, + 101, 116, 114, 105, 99, 95, 116, 121, 112, 101, 18, 2, 76, 50, 10, 20, 10, + 10, 105, 110, 100, 101, 120, 95, 116, 121, 112, 101, 18, 6, 73, 86, 70, 95, + 80, 81, 10, 8, 10, 3, 100, 105, 109, 18, 1, 56, 10, 12, 10, 5, 110, + 108, 105, 115, 116, 18, 3, 49, 48, 48, 10, 6, 10, 1, 109, 18, 1, 52}; + auto index = CreateIndex(type_params.data(), type_params.size(), index_params.data(), index_params.size()); + DeleteIndex(index); +} + TEST(BinFlatWrapper, Build) { auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT; auto metric_type = milvus::knowhere::Metric::JACCARD; @@ -227,8 +255,8 @@ TEST(BinFlatWrapper, Build) { std::vector ids(NB, 0); std::iota(ids.begin(), ids.end(), 0); auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data()); - auto index = - std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + auto index = std::make_unique( + type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); ASSERT_NO_THROW(index->BuildWithIds(xb_dataset)); } @@ -250,8 +278,8 @@ TEST(BinIdMapWrapper, Build) { std::vector ids(NB, 0); std::iota(ids.begin(), ids.end(), 0); auto xb_dataset = milvus::knowhere::GenDatasetWithIds(NB, DIM, xb_data.data(), ids.data()); - auto index = - std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + auto index = std::make_unique( + type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); ASSERT_NO_THROW(index->BuildWithIds(xb_dataset)); } @@ -266,20 +294,20 @@ INSTANTIATE_TEST_CASE_P(IndexTypeParameters, milvus::knowhere::Metric::JACCARD))); TEST_P(IndexWrapperTest, Constructor) { - auto index = - std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + auto index = std::make_unique( + type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); } TEST_P(IndexWrapperTest, Dim) { - auto index = - std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + auto index = std::make_unique( + type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); ASSERT_EQ(index->dim(), DIM); } TEST_P(IndexWrapperTest, BuildWithoutIds) { - auto index = - std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + auto index = std::make_unique( + type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); @@ -289,8 +317,8 @@ TEST_P(IndexWrapperTest, BuildWithoutIds) { } TEST_P(IndexWrapperTest, Codec) { - auto index = - std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + auto index = std::make_unique( + type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); @@ -300,8 +328,8 @@ TEST_P(IndexWrapperTest, Codec) { } auto binary = index->Serialize(); - auto copy_index = - std::make_unique(type_params_str.c_str(), index_params_str.c_str()); + auto copy_index = std::make_unique( + type_params_str.c_str(), type_params_str.size(), index_params_str.c_str(), index_params_str.size()); ASSERT_NO_THROW(copy_index->Load(binary.data, binary.size)); ASSERT_EQ(copy_index->dim(), copy_index->dim()); auto copy_binary = copy_index->Serialize(); diff --git a/internal/indexbuilder/index.go b/internal/indexbuilder/index.go index 8dc8488f01..fe358ac4e2 100644 --- a/internal/indexbuilder/index.go +++ b/internal/indexbuilder/index.go @@ -129,11 +129,43 @@ func NewCIndex(typeParams, indexParams map[string]string) (Index, error) { return nil, err } + //print := func(param []byte) { + // for i, c := range param { + // fmt.Print(c) + // fmt.Print(", ") + // if i % 25 == 0 { + // fmt.Println() + // } + // } + // fmt.Println() + //} + //print(typeParamsStr) + //fmt.Println("len(typeParamsStr): ", len(typeParamsStr)) + //print(indexParamsStr) + //fmt.Println("len(indexParamsStr): ", len(indexParamsStr)) + + var typeParamsPointer unsafe.Pointer + var indexParamsPointer unsafe.Pointer + + if len(typeParamsStr) > 0 { + typeParamsPointer = unsafe.Pointer(&typeParamsStr[0]) + } else { + typeParamsPointer = nil + } + if len(indexParamsStr) > 0 { + indexParamsPointer = unsafe.Pointer(&indexParamsStr[0]) + } else { + indexParamsPointer = nil + } + /* CIndex - CreateIndex(const char* serialized_type_params, const char* serialized_index_params); + CreateIndex(const char* serialized_type_params, + int64_t type_params_size, + const char* serialized_index_params + int64_t index_params_size); */ return &CIndex{ - indexPtr: C.CreateIndex((*C.char)(unsafe.Pointer(&typeParamsStr[0])), (*C.char)(unsafe.Pointer(&indexParamsStr[0]))), + indexPtr: C.CreateIndex((*C.char)(typeParamsPointer), (C.int64_t)(len(typeParamsStr)), (*C.char)(indexParamsPointer), (C.int64_t)(len(indexParamsStr))), }, nil } diff --git a/internal/indexbuilder/index_test.go b/internal/indexbuilder/index_test.go new file mode 100644 index 0000000000..a5e5eb2ca5 --- /dev/null +++ b/internal/indexbuilder/index_test.go @@ -0,0 +1,34 @@ +package indexbuilder + +import ( + "github.com/stretchr/testify/assert" + "strconv" + "testing" +) + +const ( + indexType = "IVF_PQ" + dim = 8 + nlist = 100 + m = 4 + nbits = 8 + metricType = "L2" +) + +func TestIndex_New(t *testing.T) { + typeParams := make(map[string]string) + indexParams := make(map[string]string) + indexParams["index_type"] = indexType + indexParams["dim"] = strconv.Itoa(dim) + indexParams["nlist"] = strconv.Itoa(nlist) + indexParams["m"] = strconv.Itoa(m) + indexParams["nbits"] = strconv.Itoa(nbits) + indexParams["metric_type"] = metricType + + index, err := NewCIndex(typeParams, indexParams) + assert.Equal(t, err, nil) + assert.NotEqual(t, index, nil) + + err = index.Delete() + assert.Equal(t, err, nil) +}