From 07029931069e6b01925eeb82ebef19978ce0f781 Mon Sep 17 00:00:00 2001 From: xige-16 Date: Mon, 4 Jul 2022 16:54:20 +0800 Subject: [PATCH] Fix query not empty after delete string (#17953) Signed-off-by: xige-16 --- internal/core/src/index/StringIndexMarisa.cpp | 19 ++++++++++++++----- internal/core/unittest/test_string_index.cpp | 7 +++++++ tests/python_client/testcases/test_delete.py | 1 - 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/internal/core/src/index/StringIndexMarisa.cpp b/internal/core/src/index/StringIndexMarisa.cpp index 5b38ea560c..f2934eae84 100644 --- a/internal/core/src/index/StringIndexMarisa.cpp +++ b/internal/core/src/index/StringIndexMarisa.cpp @@ -115,13 +115,18 @@ StringIndexMarisa::Load(const BinarySet& set) { fill_offsets(); } +bool +valid_str_id(size_t str_id) { + return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID; +} + const TargetBitmapPtr StringIndexMarisa::In(size_t n, const std::string* values) { TargetBitmapPtr bitset = std::make_unique(str_ids_.size()); for (size_t i = 0; i < n; i++) { auto str = values[i]; auto str_id = lookup(str); - if (str_id >= 0) { + if (valid_str_id(str_id)) { auto offsets = str_ids_to_offsets_[str_id]; for (auto offset : offsets) { bitset->set(offset); @@ -138,7 +143,7 @@ StringIndexMarisa::NotIn(size_t n, const std::string* values) { for (size_t i = 0; i < n; i++) { auto str = values[i]; auto str_id = lookup(str); - if (str_id >= 0) { + if (valid_str_id(str_id)) { auto offsets = str_ids_to_offsets_[str_id]; for (auto offset : offsets) { bitset->reset(offset); @@ -234,7 +239,7 @@ StringIndexMarisa::fill_str_ids(size_t n, const std::string* values) { for (size_t i = 0; i < n; i++) { auto str = values[i]; auto str_id = lookup(str); - assert(str_id >= 0); + assert(valid_str_id(str_id)); str_ids_[i] = str_id; } } @@ -254,8 +259,12 @@ size_t StringIndexMarisa::lookup(const std::string& str) { marisa::Agent agent; agent.set_query(str.c_str()); - trie_.lookup(agent); - return agent.key().id(); + if (trie_.lookup(agent)) { + return agent.key().id(); + } + + // not found the string in trie + return MARISA_INVALID_KEY_ID; } std::vector diff --git a/internal/core/unittest/test_string_index.cpp b/internal/core/unittest/test_string_index.cpp index 9b20d3f9a5..bd108610fd 100644 --- a/internal/core/unittest/test_string_index.cpp +++ b/internal/core/unittest/test_string_index.cpp @@ -208,6 +208,7 @@ TEST_F(StringIndexMarisaTest, Codec) { str_ds = GenDsFromPB(str_arr); index->BuildWithDataset(str_ds); + std::vector invalid_strings = {std::to_string(nb)}; auto copy_index = milvus::scalar::CreateStringIndexMarisa(); { @@ -221,6 +222,12 @@ TEST_F(StringIndexMarisaTest, Codec) { ASSERT_TRUE(bitset->any()); } + { + auto bitset = copy_index->In(1, invalid_strings.data()); + ASSERT_EQ(bitset->size(), nb); + ASSERT_TRUE(bitset->none()); + } + { auto bitset = copy_index->NotIn(nb, strings.data()); ASSERT_EQ(bitset->size(), nb); diff --git a/tests/python_client/testcases/test_delete.py b/tests/python_client/testcases/test_delete.py index 64400a5ed7..3af6d4034e 100644 --- a/tests/python_client/testcases/test_delete.py +++ b/tests/python_client/testcases/test_delete.py @@ -1098,7 +1098,6 @@ class TestDeleteString(TestcaseBase): collection_w.query(default_string_expr, check_task=CheckTasks.check_query_empty) collection_w.delete(expr=default_string_expr) - @pytest.mark.xfail(reason="https://github.com/milvus-io/milvus/issues/17924") @pytest.mark.tags(CaseLabel.L1) def test_delete_all_index_with_string(self): """