diff --git a/internal/core/src/common/Utils.h b/internal/core/src/common/Utils.h index 98f99451e4..68f1004f2f 100644 --- a/internal/core/src/common/Utils.h +++ b/internal/core/src/common/Utils.h @@ -228,6 +228,22 @@ GetCommonPrefix(const std::string& str1, const std::string& str2) { return str1.substr(0, i); } +// Escape braces in the input string, +// used for fmt::format json string +inline std::string +EscapeBraces(const std::string& input) { + std::string result; + for (char ch : input) { + if (ch == '{') + result += "{{"; + else if (ch == '}') + result += "}}"; + else + result += ch; + } + return result; +} + inline knowhere::sparse::SparseRow CopyAndWrapSparseRow(const void* data, size_t size, diff --git a/internal/core/src/exec/Driver.cpp b/internal/core/src/exec/Driver.cpp index 39ef70d14d..b6b62179a6 100644 --- a/internal/core/src/exec/Driver.cpp +++ b/internal/core/src/exec/Driver.cpp @@ -180,7 +180,7 @@ Driver::Next(std::shared_ptr& blocking_state) { "Operator::{} failed for [Operator:{}, plan node id: " \ "{}] : {}", \ method_name, \ - operator->get_operator_type(), \ + operator->ToString(), \ operator->get_plannode_id(), \ e.what()); \ LOG_ERROR(err_msg); \ diff --git a/internal/core/src/index/VectorMemIndex.cpp b/internal/core/src/index/VectorMemIndex.cpp index d133036718..454efc5f2f 100644 --- a/internal/core/src/index/VectorMemIndex.cpp +++ b/internal/core/src/index/VectorMemIndex.cpp @@ -25,6 +25,7 @@ #include #include +#include "common/Common.h" #include "common/Tracer.h" #include "common/Types.h" #include "common/type_c.h" @@ -439,8 +440,8 @@ VectorMemIndex::Query(const DatasetPtr dataset, PanicInfo( ErrorCode::UnexpectedError, // escape json brace in case of using message as format - "failed to search: config={{{}}} {}: {}", - search_conf.dump(), + "failed to search: config={} {}: {}", + milvus::EscapeBraces(search_conf.dump()), KnowhereStatusString(res.error()), res.what()); } diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index 15e7f50b8a..3c01681a04 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -151,6 +151,88 @@ TEST(Sealed, without_predicate) { EXPECT_EQ(sr->get_total_result_count(), 0); } +TEST(Sealed, without_search_ef_less_than_limit) { + auto schema = std::make_shared(); + auto dim = 16; + auto topK = 5; + auto metric_type = knowhere::metric::L2; + auto fake_id = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, dim, metric_type); + auto float_fid = schema->AddDebugField("age", DataType::FLOAT); + auto i64_fid = schema->AddDebugField("counter", DataType::INT64); + schema->set_primary_field_id(i64_fid); + + const char* raw_plan = R"(vector_anns: < + field_id: 100 + query_info: < + topk: 100 + round_decimal: 3 + metric_type: "L2" + search_params: "{\"ef\": 10}" + > + placeholder_tag: "$0" + >)"; + + auto N = ROW_COUNT; + + auto dataset = DataGen(schema, N); + auto vec_col = dataset.get_col(fake_id); + auto query_ptr = vec_col.data() + BIAS * dim; + + auto plan_str = translate_text_plan_to_binary_plan(raw_plan); + auto plan = + CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size()); + auto num_queries = 5; + auto ph_group_raw = + CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + Timestamp timestamp = 1000000; + + milvus::index::CreateIndexInfo create_index_info; + create_index_info.field_type = DataType::VECTOR_FLOAT; + create_index_info.metric_type = knowhere::metric::L2; + create_index_info.index_type = knowhere::IndexEnum::INDEX_HNSW; + create_index_info.index_engine_version = + knowhere::Version::GetCurrentVersion().VersionNumber(); + + auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, milvus::storage::FileManagerContext()); + + auto build_conf = + knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2}, + {knowhere::indexparam::M, "16"}, + {knowhere::indexparam::EF, "10"}}; + + auto database = knowhere::GenDataSet(N, dim, vec_col.data()); + indexing->BuildWithDataset(database, build_conf); + + LoadIndexInfo load_info; + load_info.field_id = fake_id.get(); + load_info.index = std::move(indexing); + load_info.index_params["metric_type"] = "L2"; + + // load index for vec field, load raw data for scalar field + auto sealed_segment = SealedCreator(schema, dataset); + sealed_segment->DropFieldData(fake_id); + sealed_segment->LoadIndex(load_info); + + // Test that search fails when ef parameter is less than top-k + // HNSW index requires ef to be larger than k for proper search + bool exception_thrown = false; + try { + auto sr = sealed_segment->Search(plan.get(), ph_group.get(), timestamp); + FAIL() << "Expected exception for invalid ef parameter"; + } catch (const std::exception& e) { + exception_thrown = true; + std::string error_msg = e.what(); + ASSERT_TRUE(error_msg.find("ef(10) should be larger than k(100)") != + std::string::npos) + << "Unexpected error message: " << error_msg; + } + ASSERT_TRUE(exception_thrown) << "Expected exception was not thrown"; +} + TEST(Sealed, with_predicate) { auto schema = std::make_shared(); auto dim = 16; diff --git a/scripts/core_build.sh b/scripts/core_build.sh index 7169ce05c7..bbaef6befe 100755 --- a/scripts/core_build.sh +++ b/scripts/core_build.sh @@ -288,7 +288,7 @@ if [[ ${RUN_CPPLINT} == "ON" ]]; then echo "clang-format check passed!" else # compile and build - make -j ${jobs} install || exit 1 + make -j 7 install || exit 1 fi if command -v ccache &> /dev/null diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 4962d9e61d..1bc9fe2c12 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -1249,6 +1249,30 @@ class TestCollectionSearchInvalid(TestcaseBase): "err_msg": f"metric type {metric} not found or not supported, " "supported: [HAMMING JACCARD]"}) + @pytest.mark.tags(CaseLabel.L2) + def test_search_ef_less_than_limit(self): + """ + target: test the scenario which search with ef less than limit + method: 1. create collection + 2. search with ef less than limit + expected: raise exception and report the error + """ + collection_w = self.init_collection_general(prefix, True, 2000, 0, is_index=False)[0] + index_hnsw = { + "index_type": "HNSW", + "metric_type": "L2", + "params": {"M": 8, "efConstruction" : 256}, + } + collection_w.create_index(ct.default_float_vec_field_name, index_params=index_hnsw) + collection_w.flush() + collection_w.load() + search_params = {"metric_type": "L2", "params": {"ef": 10}} + res = collection_w.search(vectors, ct.default_float_vec_field_name, + search_params, limit=100, + check_task=CheckTasks.err_res, + check_items={"err_code": 65535, + "err_msg": "query failed: N6milvus21ExecOperatorExceptionE :Operator::GetOutput failed"}) + @pytest.mark.tags(CaseLabel.L1) def test_search_dynamic_compare_two_fields(self): """