Fix query not empty after delete string (#17953)

Signed-off-by: xige-16 <xi.ge@zilliz.com>
This commit is contained in:
xige-16 2022-07-04 16:54:20 +08:00 committed by GitHub
parent 39db27865f
commit 0702993106
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 21 additions and 6 deletions

View File

@ -115,13 +115,18 @@ StringIndexMarisa::Load(const BinarySet& set) {
fill_offsets();
}
bool
valid_str_id(size_t str_id) {
return str_id >= 0 && str_id != MARISA_INVALID_KEY_ID;
}
const TargetBitmapPtr
StringIndexMarisa::In(size_t n, const std::string* values) {
TargetBitmapPtr bitset = std::make_unique<TargetBitmap>(str_ids_.size());
for (size_t i = 0; i < n; i++) {
auto str = values[i];
auto str_id = lookup(str);
if (str_id >= 0) {
if (valid_str_id(str_id)) {
auto offsets = str_ids_to_offsets_[str_id];
for (auto offset : offsets) {
bitset->set(offset);
@ -138,7 +143,7 @@ StringIndexMarisa::NotIn(size_t n, const std::string* values) {
for (size_t i = 0; i < n; i++) {
auto str = values[i];
auto str_id = lookup(str);
if (str_id >= 0) {
if (valid_str_id(str_id)) {
auto offsets = str_ids_to_offsets_[str_id];
for (auto offset : offsets) {
bitset->reset(offset);
@ -234,7 +239,7 @@ StringIndexMarisa::fill_str_ids(size_t n, const std::string* values) {
for (size_t i = 0; i < n; i++) {
auto str = values[i];
auto str_id = lookup(str);
assert(str_id >= 0);
assert(valid_str_id(str_id));
str_ids_[i] = str_id;
}
}
@ -254,8 +259,12 @@ size_t
StringIndexMarisa::lookup(const std::string& str) {
marisa::Agent agent;
agent.set_query(str.c_str());
trie_.lookup(agent);
return agent.key().id();
if (trie_.lookup(agent)) {
return agent.key().id();
}
// not found the string in trie
return MARISA_INVALID_KEY_ID;
}
std::vector<size_t>

View File

@ -208,6 +208,7 @@ TEST_F(StringIndexMarisaTest, Codec) {
str_ds = GenDsFromPB(str_arr);
index->BuildWithDataset(str_ds);
std::vector<std::string> invalid_strings = {std::to_string(nb)};
auto copy_index = milvus::scalar::CreateStringIndexMarisa();
{
@ -221,6 +222,12 @@ TEST_F(StringIndexMarisaTest, Codec) {
ASSERT_TRUE(bitset->any());
}
{
auto bitset = copy_index->In(1, invalid_strings.data());
ASSERT_EQ(bitset->size(), nb);
ASSERT_TRUE(bitset->none());
}
{
auto bitset = copy_index->NotIn(nb, strings.data());
ASSERT_EQ(bitset->size(), nb);

View File

@ -1098,7 +1098,6 @@ class TestDeleteString(TestcaseBase):
collection_w.query(default_string_expr, check_task=CheckTasks.check_query_empty)
collection_w.delete(expr=default_string_expr)
@pytest.mark.xfail(reason="https://github.com/milvus-io/milvus/issues/17924")
@pytest.mark.tags(CaseLabel.L1)
def test_delete_all_index_with_string(self):
"""