Bingyi Sun 0c0630cc38
feat: support dropping index without releasing collection (#42941)
issue: #42942

This pr includes the following changes:
1. Added checks for index checker in querycoord to generate drop index
tasks
2. Added drop index interface to querynode
3. To avoid search failure after dropping the index, the querynode
allows the use of lazy mode (warmup=disable) to load raw data even when
indexes contain raw data.
4. In segcore, loading the index no longer deletes raw data; instead, it
evicts it.
5. In expr, the index is pinned to prevent concurrent errors.

---------

Signed-off-by: sunby <sunbingyi1992@gmail.com>
2025-09-02 16:17:52 +08:00

3092 lines
145 KiB
Python

import random
from time import sleep
import numpy as np
import pytest
import copy
from base.client_base import TestcaseBase
from base.index_wrapper import ApiIndexWrapper
from base.collection_wrapper import ApiCollectionWrapper
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from common.code_mapping import CollectionErrorMessage as clem
from common.code_mapping import IndexErrorMessage as iem
from common.common_params import (
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, AlterIndexParams
)
from utils.util_pymilvus import *
from common.constants import *
from pymilvus.exceptions import MilvusException
from pymilvus import DataType
prefix = "index"
default_schema = cf.gen_default_collection_schema()
default_field_name = ct.default_float_vec_field_name
default_index_params = ct.default_index
default_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "COSINE"}
default_sparse_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "IP"}
# copied from pymilvus
uid = "test_index"
# BUILD_TIMEOUT = 300
field_name = default_float_vec_field_name
binary_field_name = default_binary_vec_field_name
default_string_field_name = ct.default_string_field_name
index_name1 = cf.gen_unique_str("float")
index_name2 = cf.gen_unique_str("varhar")
index_name3 = cf.gen_unique_str("binary")
default_string_index_params = {}
default_binary_schema = cf.gen_default_binary_collection_schema()
default_binary_index_params = ct.default_binary_index
# query = gen_search_vectors_params(field_name, default_entities, default_top_k, 1)
default_ivf_flat_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"}
default_ip_index_params = {"index_type": "IVF_FLAT", "metric_type": "IP", "params": {"nlist": 64}}
default_nq = ct.default_nq
default_limit = ct.default_limit
default_search_exp = "int64 >= 0"
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_search_ip_params = ct.default_search_ip_params
default_search_binary_params = ct.default_search_binary_params
default_nb = ct.default_nb
@pytest.mark.tags(CaseLabel.GPU)
class TestIndexParams(TestcaseBase):
""" Test case of index interface """
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("collection", [None, "coll"])
def test_index_non_collection(self, collection):
"""
target: test index with None collection
method: input none collection object
expected: raise exception
"""
self._connect()
self.index_wrap.init_index(collection, default_field_name, default_index_params, check_task=CheckTasks.err_res,
check_items={ct.err_code: 0, ct.err_msg: clem.CollectionType})
@pytest.mark.tags(CaseLabel.L1)
def test_index_field_name_not_existed(self):
"""
target: test index on non_existing field
method: input field name
expected: raise exception
"""
collection_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=collection_name)
fieldname = "non_existing"
self.index_wrap.init_index(collection_w.collection, fieldname, default_index_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: f"cannot create index on non-exist field: {fieldname}"})
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("index_type", ["non_exiting_type", 100])
def test_index_type_invalid(self, index_type):
"""
target: test index with error index type
method: input invalid index type
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
index_params = copy.deepcopy(default_index_params)
index_params["index_type"] = index_type
self.index_wrap.init_index(collection_w.collection, default_field_name, index_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100, ct.err_msg: "invalid parameter["})
@pytest.mark.tags(CaseLabel.L1)
def test_index_type_not_supported(self):
"""
target: test index with error index type
method: input unsupported index type
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
index_params = copy.deepcopy(default_index_params)
index_params["index_type"] = "IVFFFFFFF"
self.index_wrap.init_index(collection_w.collection, default_field_name, index_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 999, ct.err_msg: ""})
@pytest.mark.tags(CaseLabel.L1)
def test_index_params_invalid(self, get_invalid_index_params):
"""
target: test index with error index params
method: input invalid index params
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
index_params = get_invalid_index_params
self.index_wrap.init_index(collection_w.collection, default_field_name, index_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1, ct.err_msg: ""})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("index_name", ["_1ndeX", "In_t0"])
def test_index_naming_rules(self, index_name):
"""
target: test index naming rules
method: 1. connect milvus
2. Create a collection
3. Create an index with an index_name which uses all the supported elements in the naming rules
expected: Index create successfully
"""
self._connect()
collection_w = self.init_collection_wrap()
collection_w.create_index(default_field_name, default_index_params, index_name=index_name)
assert len(collection_w.indexes) == 1
assert collection_w.indexes[0].index_name == index_name
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("index_name", ["_1ndeX", "In_0"])
def test_index_same_index_name_two_fields(self, index_name):
"""
target: test index naming rules
method: 1. connect milvus
2. Create a collection with more than 3 fields
3. Create two indexes on two fields with the same index name
expected: raise exception
"""
self._connect()
collection_w = self.init_collection_wrap()
self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params,
index_name=index_name)
self.index_wrap.init_index(collection_w.collection, ct.default_int64_field_name, default_index_params,
index_name=index_name,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "invalid parameter"})
@pytest.mark.tags(CaseLabel.L1)
# @pytest.mark.xfail(reason="issue 19181")
@pytest.mark.parametrize("get_invalid_index_name", ["1nDex", "$in4t", "12 s", "(中文)"])
def test_index_name_invalid(self, get_invalid_index_name):
"""
target: test index with error index name
method: input invalid index name
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
index_name = get_invalid_index_name
collection_w = self.init_collection_wrap(name=c_name)
self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params,
index_name=get_invalid_index_name,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "Invalid index name"})
@pytest.mark.tags(CaseLabel.GPU)
class TestIndexOperation(TestcaseBase):
""" Test case of index interface """
@pytest.mark.tags(CaseLabel.L1)
def test_index_create_with_different_indexes(self):
"""
target: test create index on one field, with two different type of index
method: create two different indexes
expected: only latest index can be created for a collection
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params)
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one "
"distinct index is allowed per field"}
self.index_wrap.init_index(collection_w.collection, default_field_name, default_ivf_flat_index,
check_task=CheckTasks.err_res, check_items=error)
assert len(collection_w.indexes) == 1
assert collection_w.indexes[0].params["index_type"] == default_index_params["index_type"]
@pytest.mark.tags(CaseLabel.L1)
def test_index_create_indexes_for_different_fields(self):
"""
target: Test create indexes for different fields
method: create two different indexes with default index name
expected: create successfully, and the default index name equals to field name
"""
collection_w = self.init_collection_general(prefix, True, nb=200, is_index=False)[0]
default_index = ct.default_index
collection_w.create_index(default_field_name, default_index)
collection_w.create_index(ct.default_int64_field_name, {})
assert len(collection_w.indexes) == 2
for index in collection_w.indexes:
assert index.field_name == index.index_name
@pytest.mark.tags(CaseLabel.L1)
def test_index_create_on_scalar_field(self):
"""
target: Test create index on scalar field
method: create index on scalar field and load
expected: raise exception
"""
collection_w = self.init_collection_general(prefix, True, nb=200, is_index=False)[0]
collection_w.create_index(ct.default_int64_field_name, {})
collection_w.load(check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: "there is no vector index on field: [float_vector], "
"please create index firstly"})
@pytest.mark.tags(CaseLabel.L2)
def test_index_create_on_array_field(self):
"""
target: Test create index on array field
method: create index on array field
expected: raise exception
"""
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
collection_w.create_index(ct.default_string_array_field_name, {})
assert collection_w.index()[0].params == {}
@pytest.mark.tags(CaseLabel.L1)
def test_index_collection_empty(self):
"""
target: test index with empty collection
method: Index on empty collection
expected: no exception raised
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
index, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params)
cf.assert_equal_index(index, collection_w.collection.indexes[0])
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("index_param", [default_index_params])
def test_index_params(self, index_param):
"""
target: test index with all index type/params
method: input valid params
expected: no exception raised
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index_params = index_param
index, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, index_params)
cf.assert_equal_index(index, collection_w.collection.indexes[0])
@pytest.mark.tags(CaseLabel.L1)
def test_index_params_flush(self):
"""
target: test index with all index type/params
method: input valid params
expected: no exception raised
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
# flush
collection_w.num_entities
index, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params)
# TODO: assert index
cf.assert_equal_index(index, collection_w.collection.indexes[0])
assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L1)
def test_index_name_dup(self):
"""
target: test index with duplicate index name
method: create index with existed index name and different index params
expected: raise exception
create index with the same index name and same index params
expected: no exception raised
"""
c_name = cf.gen_unique_str(prefix)
index_name = ct.default_index_name
collection_w = self.init_collection_wrap(name=c_name)
params = cf.get_index_params_params("HNSW")
index_params = {"index_type": "HNSW", "metric_type": "L2", "params": params}
params2 = cf.get_index_params_params("HNSW")
params2.update({"M": 16, "efConstruction": 200})
index_params2 = {"index_type": "HNSW", "metric_type": "L2", "params": params2}
collection_w.collection.create_index(default_field_name, index_params, index_name=index_name)
# create index with the same index name and different index params
error = {ct.err_code: 999, ct.err_msg: "at most one distinct index is allowed per field"}
self.index_wrap.init_index(collection_w.collection, default_field_name, index_params2, index_name=index_name,
check_task=CheckTasks.err_res,
check_items=error)
# create index with the same index name and same index params
self.index_wrap.init_index(collection_w.collection, default_field_name, index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_index_same_name_on_diff_fields(self):
"""
target: verify index with the same name on different fields is not supported
method: create index with index name A on fieldA, create index with index name A on fieldB
expected: raise exception
"""
# collection_w, _ = self.init_collection_general(prefix, dim=64, insert_data=False, is_index=False,
# multiple_dim_array=[32])
id_field = cf.gen_int64_field(name="id", is_primary=True)
vec_field = cf.gen_float_vec_field(name="vec_field", dim=64)
vec_field2 = cf.gen_float_vec_field(name="vec_field2", dim=32)
str_field = cf.gen_string_field(name="str_field")
str_field2 = cf.gen_string_field(name="str_field2")
schema, _ = self.collection_schema_wrap.init_collection_schema(
[id_field, vec_field, vec_field2, str_field, str_field2])
collection_w = self.init_collection_wrap(schema=schema)
vec_index = ct.default_index
vec_index_name = "my_index"
# create same index name on different vector fields
error = {ct.err_code: 999, ct.err_msg: "at most one distinct index is allowed per field"}
collection_w.create_index(vec_field.name, vec_index, index_name=vec_index_name)
collection_w.create_index(vec_field2.name, vec_index, index_name=vec_index_name,
check_task=CheckTasks.err_res,
check_items=error)
# create same index name on different scalar fields
collection_w.create_index(str_field.name, index_name=vec_index_name,
check_task=CheckTasks.err_res,
check_items=error)
# create same salar index nae on different scalar fields
index_name = "scalar_index"
collection_w.create_index(str_field.name, index_name=index_name)
collection_w.create_index(str_field2.name, index_name=index_name,
check_task=CheckTasks.err_res,
check_items=error)
all_indexes = collection_w.indexes
assert len(all_indexes) == 2
assert all_indexes[0].index_name != all_indexes[1].index_name
for index in all_indexes:
assert index.index_name in [vec_index_name, index_name]
@pytest.mark.tags(CaseLabel.L1)
def test_index_drop_index(self):
"""
target: test index.drop
method: create index by `index`, and then drop it
expected: no exception raised
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
index, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params)
cf.assert_equal_index(index, collection_w.collection.indexes[0])
self.index_wrap.drop()
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L1)
def test_index_drop_repeatedly(self):
"""
target: test index.drop
method: create index by `index`, and then drop it twice
expected: exception raised
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
_, _ = self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params)
self.index_wrap.drop()
self.index_wrap.drop()
@pytest.mark.tags(CaseLabel.GPU)
class TestIndexAdvanced(TestcaseBase):
""" Test case of index interface """
@pytest.mark.tags(CaseLabel.L2)
def test_index_drop_multi_collections(self):
"""
target: test index.drop
method: create indexes by `index`, and then drop it, assert there is one index left
expected: exception raised
"""
c_name = cf.gen_unique_str(prefix)
c_name_2 = cf.gen_unique_str(prefix)
cw = self.init_collection_wrap(name=c_name)
cw2 = self.init_collection_wrap(name=c_name_2)
iw_2 = ApiIndexWrapper()
self.index_wrap.init_index(cw.collection, default_field_name, default_index_params)
index_2, _ = iw_2.init_index(cw2.collection, default_field_name, default_index_params)
self.index_wrap.drop()
assert cf.assert_equal_index(index_2, cw2.collection.indexes[0])
assert len(cw.collection.indexes) == 0
@pytest.mark.tags(CaseLabel.GPU)
class TestNewIndexBase(TestcaseBase):
"""
******************************************************************
The following cases are used to test `create_index` function
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("index_type", ct.all_index_types[0:8])
def test_create_index_default(self, index_type):
"""
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, shards_num=1)
data = cf.gen_default_list_data(nb=1500)
collection_w.insert(data=data)
params = cf.get_index_params_params(index_type)
index_params = {"index_type": index_type, "metric_type": "L2", "params": params}
collection_w.create_index(ct.default_float_vec_field_name, index_params=index_params,
index_name=ct.default_index_name)
assert len(collection_w.indexes) == 1
collection_w.drop_index(index_name=ct.default_index_name)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_non_existed_field(self):
"""
target: test create index interface
method: create collection and add entities in it, create index on other field
expected: error raised
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.create_index(ct.default_int8_field_name, default_index_params,
index_name=ct.default_index_name,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: "cannot create index on non-exist field: int8"}
)
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_partition(self):
"""
target: test create index interface
method: create collection, create partition, and add entities in it, create index
expected: return search success
"""
collection_w = self.init_collection_wrap()
partition_name = cf.gen_unique_str(prefix)
partition_w = self.init_partition_wrap(collection_w, partition_name)
assert collection_w.has_partition(partition_name)[0]
data = cf.gen_default_list_data()
ins_res, _ = partition_w.insert(data)
assert len(ins_res.primary_keys) == len(data[0])
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_partition_flush(self):
"""
target: test create index interface
method: create collection, create partition, and add entities in it, create index
expected: return search success
"""
collection_w = self.init_collection_wrap()
partition_name = cf.gen_unique_str(prefix)
partition_w = self.init_partition_wrap(collection_w, partition_name)
assert collection_w.has_partition(partition_name)[0]
data = cf.gen_default_list_data(default_nb)
partition_w.insert(data)
assert collection_w.num_entities == default_nb
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_without_connect(self):
"""
target: test create index without connection
method: create collection and add entities in it, check if added successfully
expected: raise exception
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
self.connection_wrap.remove_connection(ct.default_alias)
res_list, _ = self.connection_wrap.list_connections()
assert ct.default_alias not in res_list
collection_w.create_index(ct.default_float_vec_field_name, ct.default_index,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 999, ct.err_msg: "should create connection first"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_search_with_query_vectors(self):
"""
target: test create index interface, search with more query vectors
method: create collection and add entities in it, create index
expected: return search success
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp)
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_multithread(self):
"""
target: test create index interface with multiprocess
method: create collection and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
def build(collection_w):
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
threads_num = 8
threads = []
for i in range(threads_num):
t = MyThread(target=build, args=(collection_w,))
threads.append(t)
t.start()
time.sleep(0.2)
for t in threads:
t.join()
@pytest.mark.tags(CaseLabel.L1)
def test_create_same_index_repeatedly(self):
"""
target: check if index can be created repeatedly, with the same create_index params
method: create index after index have been built
expected: return code success, and search ok
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
assert len(collection_w.indexes) == 1
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_different_name(self):
"""
target: check if index can be created repeatedly, with the same create_index params
method: create index after index have been built
expected: raise error
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="a")
collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="b",
check_task=CheckTasks.err_res,
check_items={ct.err_code: 999,
ct.err_msg: "CreateIndex failed: creating multiple indexes on same field is not supported"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_repeatedly_new(self):
"""
target: check if index can be created repeatedly, with the different create_index params
method: create another index with different index_params after index have been built
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index_prams = [default_ivf_flat_index,
{"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}]
for index in index_prams:
index_name = cf.gen_unique_str("name")
collection_w.create_index(default_float_vec_field_name, index, index_name=index_name)
collection_w.load()
collection_w.release()
collection_w.drop_index(index_name=index_name)
assert len(collection_w.collection.indexes) == 0
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_ip(self):
"""
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_partition_ip(self):
"""
target: test create index interface
method: create collection, create partition, and add entities in it, create index
expected: return search success
"""
collection_w = self.init_collection_wrap()
partition_name = cf.gen_unique_str(prefix)
partition_w = self.init_partition_wrap(collection_w, partition_name)
assert collection_w.has_partition(partition_name)[0]
data = cf.gen_default_list_data(default_nb)
ins_res, _ = partition_w.insert(data)
assert len(ins_res.primary_keys) == len(data[0])
collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params)
@pytest.mark.tags(CaseLabel.L2)
def test_create_index_partition_flush_ip(self):
"""
target: test create index
method: create collection and create index, add entities in it
expected: create index ok, and count correct
"""
collection_w = self.init_collection_wrap()
partition_name = cf.gen_unique_str(prefix)
partition_w = self.init_partition_wrap(collection_w, partition_name)
assert collection_w.has_partition(partition_name)[0]
data = cf.gen_default_list_data(default_nb)
partition_w.insert(data)
assert collection_w.num_entities == default_nb
collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_search_with_query_vectors_ip(self):
"""
target: test create index interface, search with more query vectors
method: create collection and add entities in it, create index
expected: return search success
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params)
collection_w.load()
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_ip_params, default_limit,
default_search_exp)
@pytest.mark.tags(CaseLabel.L2)
def test_create_index_multithread_ip(self):
"""
target: test create index interface with multiprocess
method: create collection and add entities in it, create index
expected: return success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
def build(collection_w):
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params)
threads_num = 8
threads = []
for i in range(threads_num):
t = MyThread(target=build, args=(collection_w,))
threads.append(t)
t.start()
time.sleep(0.2)
for t in threads:
t.join()
@pytest.mark.tags(CaseLabel.L2)
def test_create_index_no_vectors_insert_ip(self):
"""
target: test create index interface when there is no vectors in collection,
and does not affect the subsequent process
method: create collection and add no vectors in it, and then create index,
add entities in it
expected: return insert suceess
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params)
collection_w.insert(data=data)
assert collection_w.num_entities == default_nb
@pytest.mark.tags(CaseLabel.L2)
def test_create_same_index_repeatedly_ip(self):
"""
target: check if index can be created repeatedly, with the same create_index params
method: create index after index have been built
expected: return code success, and search ok
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params)
collection_w.create_index(ct.default_float_vec_field_name, default_ip_index_params)
assert len(collection_w.indexes) == 1
@pytest.mark.tags(CaseLabel.L0)
def test_create_different_index_repeatedly_ip(self):
"""
target: check if index can be created repeatedly, with the different create_index params
method: create another index with different index_params after index have been built
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index_prams = [default_ip_index_params,
{"metric_type": "IP", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}]
for index in index_prams:
index_name = cf.gen_unique_str("name")
collection_w.create_index(default_float_vec_field_name, index, index_name=index_name)
collection_w.release()
collection_w.drop_index(index_name=index_name)
assert len(collection_w.collection.indexes) == 0
"""
******************************************************************
The following cases are used to test `drop_index` function
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
def test_drop_index_repeatedly(self):
"""
target: test drop index repeatedly
method: create index, call drop index, and drop again
expected: return code 0
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
params = cf.get_index_params_params("HNSW")
index_params = {"index_type": "HNSW", "metric_type": "L2", "params": params}
collection_w.create_index(ct.default_float_vec_field_name, index_params,
index_name=ct.default_index_name)
assert len(collection_w.indexes) == 1
collection_w.drop_index(index_name=ct.default_index_name)
assert len(collection_w.indexes) == 0
collection_w.drop_index(index_name=ct.default_index_name)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L2)
def test_drop_index_without_connect(self):
"""
target: test drop index without connection
method: drop index, and check if drop successfully
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
self.connection_wrap.remove_connection(ct.default_alias)
collection_w.drop_index(index_name=ct.default_index_name, check_task=CheckTasks.err_res,
check_items={ct.err_code: 999, ct.err_msg: "should create connection first."})
@pytest.mark.tags(CaseLabel.L2)
def test_create_drop_index_repeatedly(self):
"""
target: test create / drop index repeatedly, use the same index params
method: create index, drop index, four times
expected: return code 0
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
params = cf.get_index_params_params("HNSW")
index_params = {"index_type": "HNSW", "metric_type": "L2", "params": params}
for i in range(4):
collection_w.create_index(ct.default_float_vec_field_name, index_params,
index_name=ct.default_index_name)
assert len(collection_w.indexes) == 1
collection_w.drop_index(index_name=ct.default_index_name)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L0)
def test_create_PQ_without_nbits(self):
"""
target: test create PQ index
method: create PQ index without nbits
expected: create successfully
"""
PQ_index = {"index_type": "IVF_PQ", "params": {"nlist": 128, "m": 16}, "metric_type": "L2"}
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name)
collection_w.create_index(ct.default_float_vec_field_name, PQ_index, index_name=ct.default_index_name)
assert len(collection_w.indexes) == 1
@pytest.mark.tags(CaseLabel.L2)
def test_drop_index_collection_not_create_ip(self):
"""
target: test drop index interface when index not created
method: create collection and add entities in it, create index
expected: return code not equals to 0, drop index failed
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.drop_index(index_name=default_field_name)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L1)
def test_index_collection_with_after_load(self):
"""
target: Test that index files are not lost after loading
method: create collection and add entities in it, create index, flush and load
expected: load and search successfully
"""
collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix))
nums = 5
tmp_nb = 1000
for i in range(nums):
df = cf.gen_default_dataframe_data(nb=tmp_nb, start=i * tmp_nb)
insert_res, _ = collection_w.insert(df)
assert collection_w.num_entities == (i + 1) * tmp_nb
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
search_res, _ = collection_w.search(vectors, default_search_field, default_search_params, default_limit)
assert len(search_res[0]) == ct.default_limit
@pytest.mark.tags(CaseLabel.L1)
def test_turn_off_index_mmap(self):
"""
target: disabling and re-enabling mmap for index
method: disabling and re-enabling mmap for index
expected: search success
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
collection_w.insert(cf.gen_default_list_data())
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.load()
collection_w.release()
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': False})
collection_w.load()
assert collection_w.index()[0].params["mmap.enabled"] == 'False'
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp)
collection_w.release()
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.load()
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index", ct.all_index_types[:7])
def test_drop_mmap_index(self, index):
"""
target: disabling and re-enabling mmap for index
method: disabling and re-enabling mmap for index
expected: search success
"""
self._connect()
collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False)[0]
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
collection_w.create_index(field_name, default_index, index_name=f"mmap_index_{index}")
collection_w.alter_index(f"mmap_index_{index}", {'mmap.enabled': True})
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.drop_index(index_name=f"mmap_index_{index}")
collection_w.create_index(field_name, default_index, index_name=f"index_{index}")
collection_w.load()
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit})
@pytest.mark.tags(CaseLabel.L2)
def test_rebuild_mmap_index(self):
"""
target: reconstructing an index after an alter index
method: reconstructing an index after an alter index
expected: build indexes normally , index contains mmap information
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_general(c_name, insert_data=True, is_index=False)[0]
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
collection_w.set_properties({'mmap.enabled': True})
pro = collection_w.describe()[0].get("properties")
assert pro["mmap.enabled"] == 'True'
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.insert(cf.gen_default_list_data())
collection_w.flush()
# check if mmap works after rebuild index
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.load()
collection_w.release()
# check if mmap works after reloading and rebuilding index.
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
pro = collection_w.describe()[0].get("properties")
assert pro["mmap.enabled"] == 'True'
collection_w.load()
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit})
@pytest.mark.tags(CaseLabel.GPU)
class TestNewIndexBinary(TestcaseBase):
"""
******************************************************************
The following cases are used to test `create_index` function
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
def test_create_binary_index_on_scalar_field(self):
"""
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
"""
collection_w = self.init_collection_general(prefix, True, is_binary=True, is_index=False)[0]
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=binary_field_name)
assert collection_w.has_index(index_name=binary_field_name)[0] is True
@pytest.mark.tags(CaseLabel.L0)
def test_create_index_partition(self):
"""
target: test create index interface
method: create collection, create partition, and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
partition_name = cf.gen_unique_str(prefix)
partition_w = self.init_partition_wrap(collection_w, partition_name)
assert collection_w.has_partition(partition_name)[0]
df, _ = cf.gen_default_binary_dataframe_data()
ins_res, _ = partition_w.insert(df)
assert len(ins_res.primary_keys) == len(df)
collection_w.create_index(default_binary_vec_field_name, default_binary_index_params,
index_name=binary_field_name)
assert collection_w.has_index(index_name=binary_field_name)[0] is True
assert len(collection_w.indexes) == 1
@pytest.mark.tags(CaseLabel.L0)
def test_create_index_search_with_query_vectors(self):
"""
target: test create index interface, search with more query vectors
method: create collection and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
df, _ = cf.gen_default_binary_dataframe_data()
collection_w.insert(data=df)
collection_w.create_index(default_binary_vec_field_name, default_binary_index_params,
index_name=binary_field_name)
collection_w.load()
_, vectors = cf.gen_binary_vectors(default_nq, default_dim)
collection_w.search(vectors[:default_nq], binary_field_name,
default_search_binary_params, default_limit,
default_search_exp)
@pytest.mark.tags(CaseLabel.L2)
def test_create_index_invalid_metric_type_binary(self):
"""
target: test create index interface with invalid metric type
method: add entities into binary collection, flush, create index with L2 metric type.
expected: return create_index failure
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
binary_index_params = {'index_type': 'BIN_IVF_FLAT', 'metric_type': 'L2', 'params': {'nlist': 64}}
collection_w.create_index(default_binary_vec_field_name, binary_index_params,
index_name=binary_field_name, check_task=CheckTasks.err_res,
check_items={ct.err_code: 999,
ct.err_msg: "binary vector index does not support metric type: L2"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE", "JACCARD", "HAMMING"])
def test_create_binary_index_HNSW(self, metric_type):
"""
target: test create binary index hnsw
method: create binary index hnsw
expected: succeed
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
binary_index_params = {'index_type': 'HNSW', "M": '18', "efConstruction": '240', 'metric_type': metric_type}
error = {ct.err_code: 999, ct.err_msg: f"binary vector index does not support metric type: {metric_type}"}
if metric_type in ["JACCARD", "HAMMING"]:
error = {ct.err_code: 999, ct.err_msg: f"data type BinaryVector can't build with this index HNSW"}
collection_w.create_index(default_binary_vec_field_name, binary_index_params,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metric", ct.binary_metrics)
def test_create_binary_index_all_metrics(self, metric):
"""
target: test create binary index using all supported metrics
method: create binary using all supported metrics
expected: succeed
"""
collection_w = self.init_collection_general(prefix, True, is_binary=True, is_index=False)[0]
binary_index_params = {"index_type": "BIN_FLAT", "metric_type": metric, "params": {"nlist": 64}}
collection_w.create_index(binary_field_name, binary_index_params)
assert collection_w.has_index()[0] is True
"""
******************************************************************
The following cases are used to test `drop_index` function
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
def test_drop_index(self):
"""
target: test drop index interface
method: create collection and add entities in it, create index, call drop index
expected: return code 0, and default index param
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
df, _ = cf.gen_default_binary_dataframe_data()
collection_w.insert(data=df)
collection_w.create_index(default_binary_vec_field_name, default_binary_index_params,
index_name=binary_field_name)
assert len(collection_w.indexes) == 1
collection_w.drop_index(index_name=binary_field_name)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L0)
def test_drop_index_partition(self):
"""
target: test drop index interface
method: create collection, create partition and add entities in it,
create index on collection, call drop collection index
expected: return code 0, and default index param
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
partition_name = cf.gen_unique_str(prefix)
partition_w = self.init_partition_wrap(collection_w, partition_name)
assert collection_w.has_partition(partition_name)[0]
df, _ = cf.gen_default_binary_dataframe_data()
ins_res, _ = partition_w.insert(df)
assert len(ins_res.primary_keys) == len(df)
collection_w.create_index(default_binary_vec_field_name, default_binary_index_params,
index_name=binary_field_name)
assert collection_w.has_index(index_name=binary_field_name)[0] is True
assert len(collection_w.indexes) == 1
collection_w.drop_index(index_name=binary_field_name)
assert collection_w.has_index(index_name=binary_field_name)[0] is False
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.GPU)
class TestIndexInvalid(TestcaseBase):
"""
Test create / describe / drop index interfaces with invalid collection names
"""
@pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED", IndexName.BITMAP])
def scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=ct.all_dense_vector_types)
def vector_data_type(self, request):
yield request.param
@pytest.fixture(scope="function", params=ct.invalid_resource_names)
def invalid_index_name(self, request):
if request.param in [None, "", " "]:
pytest.skip("None and empty is valid for there is a default index name")
yield request.param
@pytest.mark.tags(CaseLabel.L0)
def test_index_with_invalid_index_name(self, invalid_index_name):
"""
target: test create index interface for invalid scenario
method:
1. create index with invalid collection name
expected: raise exception
2. drop index with an invalid index name
expected: succeed
"""
collection_w = self.init_collection_wrap()
error = {ct.err_code: 999, ct.err_msg: f"Invalid index name: {invalid_index_name}"}
collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name=invalid_index_name,
check_task=CheckTasks.err_res, check_items=error)
# drop index with an invalid index name
collection_w.drop_index(index_name=invalid_index_name)
@pytest.mark.tags(CaseLabel.L1)
def test_drop_index_without_release(self):
"""
target: test drop index after load without release
method: 1. create a collection and build an index then load
2. drop the index
expected: raise exception
"""
collection_w = self.init_collection_general(prefix, True, nb=100, is_index=False)[0]
collection_w.create_index(ct.default_float_vec_field_name, ct.default_index)
collection_w.load()
collection_w.drop_index(check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100,
ct.err_msg: "vector index cannot be dropped on loaded collection"})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("n_trees", [-1, 1025, 'a'])
def test_annoy_index_with_invalid_params(self, n_trees):
"""
target: test create index with invalid params
method: 1. set annoy index param n_trees out of range [1, 1024]
2. set annoy index param n_trees type invalid(not int)
expected: raise exception
"""
collection_w = self.init_collection_general(prefix, True, nb=100, is_index=False)[0]
index_annoy = {"index_type": "ANNOY", "params": {"n_trees": n_trees}, "metric_type": "L2"}
collection_w.create_index("float_vector", index_annoy,
check_task=CheckTasks.err_res,
check_items={"err_code": 1100,
"err_msg": "invalid index type: ANNOY"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_scalar_index_on_vector_field(self, scalar_index, vector_data_type):
"""
target: test create scalar index on vector field
method: 1.create collection, and create index
expected: Raise exception
"""
collection_w = self.init_collection_general(prefix, True, nb=100,
is_index=False, vector_data_type=vector_data_type)[0]
scalar_index_params = {"index_type": scalar_index}
collection_w.create_index(ct.default_float_vec_field_name, index_params=scalar_index_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100,
ct.err_msg: f"invalid index params"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_scalar_index_on_binary_vector_field(self, scalar_index):
"""
target: test create scalar index on binary vector field
method: 1.create collection, and create index
expected: Raise exception
"""
collection_w = self.init_collection_general(prefix, is_binary=True, is_index=False)[0]
scalar_index_params = {"index_type": scalar_index}
collection_w.create_index(ct.default_binary_vec_field_name, index_params=scalar_index_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100,
ct.err_msg: "metric type not set for vector index"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_inverted_index_on_json_field(self, vector_data_type):
"""
target: test create scalar index on json field
method: 1.create collection, and create index
expected: success
"""
collection_w = self.init_collection_general(prefix, is_index=False, vector_data_type=vector_data_type)[0]
scalar_index_params = {"index_type": "INVERTED", "json_cast_type": "double", "json_path": ct.default_json_field_name+"['a']"}
collection_w.create_index(ct.default_json_field_name, index_params=scalar_index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_create_inverted_index_on_array_field(self):
"""
target: test create scalar index on array field
method: 1.create collection, and create index
expected: Raise exception
"""
# 1. create a collection
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
# 2. create index
scalar_index_params = {"index_type": "INVERTED"}
collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_create_inverted_index_no_vector_index(self):
"""
target: test create scalar index on array field
method: 1.create collection, and create index
expected: Raise exception
"""
# 1. create a collection
collection_w = self.init_collection_general(prefix, is_index=False)[0]
# 2. create index
scalar_index_params = {"index_type": "INVERTED"}
collection_w.create_index(ct.default_float_field_name, index_params=scalar_index_params)
collection_w.load(check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: "there is no vector index on field: [float_vector], "
"please create index firstly"})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("scalar_index", ["STL_SORT", "INVERTED"])
def test_create_inverted_index_no_all_vector_index(self, scalar_index):
"""
target: test create scalar index on array field
method: 1.create collection, and create index
expected: Raise exception
"""
# 1. create a collection
multiple_dim_array = [ct.default_dim, ct.default_dim]
collection_w = self.init_collection_general(prefix, is_index=False, multiple_dim_array=multiple_dim_array)[0]
# 2. create index
scalar_index_params = {"index_type": scalar_index}
collection_w.create_index(ct.default_float_field_name, index_params=scalar_index_params)
vector_name_list = cf.extract_vector_field_name_list(collection_w)
flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "L2"}
collection_w.create_index(ct.default_float_vec_field_name, flat_index)
collection_w.load(check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: f"there is no vector index on field: "
f"[{vector_name_list[0]} {vector_name_list[1]}], "
f"please create index firstly"})
@pytest.mark.tags(CaseLabel.L1)
def test_set_non_exist_index_mmap(self):
"""
target: enabling mmap for non-existent indexes
method: enabling mmap for non-existent indexes
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
collection_w.insert(cf.gen_default_list_data())
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index,
index_name=ct.default_index_name)
collection_w.alter_index("random_index_345", {'mmap.enabled': True},
check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: f"index not found"})
@pytest.mark.tags(CaseLabel.L1)
def test_load_mmap_index(self):
"""
target: after loading, enable mmap for the index
method: 1. data preparation and create index
2. load collection
3. enable mmap on index
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
collection_w.insert(cf.gen_default_list_data())
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index,
index_name=ct.default_index_name)
collection_w.load()
collection_w.alter_index(binary_field_name, {'mmap.enabled': True},
check_task=CheckTasks.err_res,
check_items={ct.err_code: 104,
ct.err_msg: f"can't alter index on loaded collection"})
@pytest.mark.tags(CaseLabel.L2)
def test_turning_on_mmap_for_scalar_index(self):
"""
target: turn on mmap for scalar indexes
method: turn on mmap for scalar indexes
expected: raise exception
"""
collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0]
# Trie support now
scalar_index = ["STL_SORT"]
scalar_fields = [ct.default_int16_field_name]
for i in range(len(scalar_fields)):
index_name = f"scalar_index_name_{i}"
scalar_index_params = {"index_type": f"{scalar_index[i]}"}
collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name)
assert collection_w.has_index(index_name=index_name)[0] is True
collection_w.alter_index(index_name, {'mmap.enabled': True},
check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: f"index type {scalar_index[i]} does not support mmap"})
collection_w.drop_index(index_name)
@pytest.mark.tags(CaseLabel.L2)
def test_alter_index_invalid(self):
"""
target: the alter index of the error parameter
method: the alter index of the error parameter
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
collection_w.insert(cf.gen_default_list_data())
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': "error_value"},
check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: f"invalid mmap.enabled value"})
collection_w.alter_index(ct.default_index_name, {"error_param_key": 123},
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100,
ct.err_msg: "error_param_key is not a configable index property:"})
collection_w.alter_index(ct.default_index_name, ["error_param_type"],
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: f"Unexpected error"})
collection_w.alter_index(ct.default_index_name, None,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "properties should not be None"})
collection_w.alter_index(ct.default_index_name, 1000,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: f"<'int' object has no attribute 'items'"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metric_type", ["L2", "COSINE", " ", "invalid"])
@pytest.mark.parametrize("index", ct.all_index_types[10:12])
def test_invalid_sparse_metric_type(self, metric_type, index):
"""
target: unsupported metric_type create index
method: unsupported metric_type creates an index
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_default_sparse_schema()
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
data = cf.gen_default_list_sparse_data()
collection_w.insert(data=data)
param = cf.get_index_params_params(index)
params = {"index_type": index, "metric_type": metric_type, "params": param}
error = {ct.err_code: 65535, ct.err_msg: "only IP&BM25 is the supported metric type for sparse index"}
index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("ratio", [-0.5, 1, 3])
@pytest.mark.parametrize("index ", ct.all_index_types[10:12])
def test_invalid_sparse_ratio(self, ratio, index):
"""
target: index creation for unsupported ratio parameter
method: indexing of unsupported ratio parameters
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_default_sparse_schema()
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
data = cf.gen_default_list_sparse_data()
collection_w.insert(data=data)
params = {"index_type": index, "metric_type": "IP", "params": {"drop_ratio_build": ratio}}
error = {ct.err_code: 999,
ct.err_msg: f"Out of range in json: param 'drop_ratio_build' ({ratio*1.0}) should be in range [0.000000, 1.000000)"}
index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("inverted_index_algo", ["INVALID_ALGO"])
@pytest.mark.parametrize("index ", ct.all_index_types[10:12])
def test_invalid_sparse_inverted_index_algo(self, inverted_index_algo, index):
"""
target: index creation for unsupported ratio parameter
method: indexing of unsupported ratio parameters
expected: raise exception
"""
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_default_sparse_schema()
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
data = cf.gen_default_list_sparse_data()
collection_w.insert(data=data)
params = {"index_type": index, "metric_type": "IP", "params": {"inverted_index_algo": inverted_index_algo}}
error = {ct.err_code: 999,
ct.err_msg: f"sparse inverted index algo {inverted_index_algo} not found or not supported, "
f"supported: [TAAT_NAIVE DAAT_WAND DAAT_MAXSCORE]"}
index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.GPU)
class TestNewIndexAsync(TestcaseBase):
@pytest.fixture(scope="function", params=[False, True])
def _async(self, request):
yield request.param
def call_back(self):
assert True
"""
******************************************************************
The following cases are used to test `create_index` function
******************************************************************
"""
# @pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index(self, _async):
"""
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
res, _ = collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name, _async=_async)
if _async:
res.done()
assert len(collection_w.indexes) == 1
@pytest.mark.tags(CaseLabel.L0)
# @pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_drop(self, _async):
"""
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
res, _ = collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name, _async=_async)
# load and search
if _async:
res.done()
assert len(collection_w.indexes) == 1
collection_w.load()
vectors_s = [[random.random() for _ in range(ct.default_dim)] for _ in range(ct.default_nq)]
search_res, _ = collection_w.search(vectors_s[:ct.default_nq], ct.default_float_vec_field_name,
ct.default_search_params, ct.default_limit)
assert len(search_res) == ct.default_nq
assert len(search_res[0]) == ct.default_limit
collection_w.release()
if _async:
res.done()
assert collection_w.indexes[0].params == default_index_params
collection_w.drop_index(index_name=ct.default_index_name)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L0)
# @pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_callback(self):
"""
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
res, _ = collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name, _async=True,
_callback=self.call_back())
@pytest.mark.tags(CaseLabel.GPU)
class TestIndexString(TestcaseBase):
"""
******************************************************************
The following cases are used to test create index about string
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_with_string_field(self):
"""
target: test create index with string field is not primary
method: 1.create collection and insert data
2.only create an index with string field is not primary
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
default_string_index_params)
cf.assert_equal_index(index, collection_w.indexes[0])
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_with_string_before_load(self):
"""
target: test create index with string field before load
method: 1.create collection and insert data
2.create an index with string field before load
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(ct.default_nb)
collection_w.insert(data=data)
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
default_string_index_params)
cf.assert_equal_index(index, collection_w.indexes[0])
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index,
index_name="vector_flat")
collection_w.load()
assert collection_w.num_entities == default_nb
@pytest.mark.tags(CaseLabel.L1)
def test_load_after_create_index_with_string(self):
"""
target: test load after create index with string field
method: 1.create collection and insert data
2.collection load after create index with string field
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(ct.default_nb)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index,
index_name="vector_flat")
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
default_string_index_params)
collection_w.load()
cf.assert_equal_index(index, collection_w.indexes[0])
assert collection_w.num_entities == default_nb
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_with_string_field_is_primary(self):
"""
target: test create index with string field is primary
method: 1.create collection
2.insert data
3.only create an index with string field is primary
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_string_pk_default_collection_schema()
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
default_string_index_params)
cf.assert_equal_index(index, collection_w.indexes[0])
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_or_not_with_string_field(self):
"""
target: test create index, half of the string fields are indexed and half are not
method: 1.create collection
2.insert data
3.half of the indexes are created and half are not in the string fields
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
string_fields = [cf.gen_string_field(name="test_string")]
schema = cf.gen_schema_multi_string_fields(string_fields)
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
df = cf.gen_dataframe_multi_string_fields(string_fields=string_fields)
collection_w.insert(df)
self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params)
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_with_same_index_name(self):
"""
target: test create index with different fields use same index name
method: 1.create collection
2.insert data
3.only create index with different fields use same index name
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
collection_w.create_index(default_float_vec_field_name, default_index_params,
index_name=index_name2,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1, ct.err_msg: "CreateIndex failed"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_different_index_fields(self):
"""
target: test create index with different fields
method: 1.create collection
2.insert data
3.create different indexes with string and float vector field
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.create_index(default_float_vec_field_name, default_index_params, index_name=index_name1)
assert collection_w.has_index(index_name=index_name1)[0] == True
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
assert collection_w.has_index(index_name=index_name2)[0] == True
assert len(collection_w.indexes) == 2
@pytest.mark.tags(CaseLabel.L1)
def test_create_different_index_binary_fields(self):
"""
target: testing the creation of indexes with string and binary fields
method: 1.create collection
2.insert data
3.create different indexes with string and binary vector field
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
df, _ = cf.gen_default_binary_dataframe_data()
collection_w.insert(data=df)
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
assert collection_w.has_index(index_name=index_name2)[0] == True
collection_w.create_index(default_binary_vec_field_name, default_binary_index_params, index_name=index_name3)
assert collection_w.has_index(index_name=index_name3)[0] == True
assert len(collection_w.indexes) == 2
@pytest.mark.tags(CaseLabel.L1)
def test_drop_index_with_string_field(self):
"""
target: test drop index with string field
method: 1.create collection and insert data
2.create index and use index.drop() drop index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
default_string_index_params)
cf.assert_equal_index(index, collection_w.indexes[0])
self.index_wrap.drop()
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L1)
def test_collection_drop_index_with_string(self):
"""
target: test drop index with string field
method: 1.create collection and insert data
2.create index and uses collection.drop_index () drop index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
collection_w.drop_index(index_name=index_name2)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L1)
def test_index_with_string_field_empty(self):
"""
target: test drop index with string field
method: 1.create collection and insert data
2.create index and uses collection.drop_index () drop index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
nb = 3000
data = cf.gen_default_list_data(nb)
data[2] = ["" for _ in range(nb)]
collection_w.insert(data=data)
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
assert collection_w.has_index(index_name=index_name2)[0] == True
collection_w.drop_index(index_name=index_name2)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.GPU)
class TestIndexDiskann(TestcaseBase):
"""
******************************************************************
The following cases are used to test create index about diskann
******************************************************************
"""
@pytest.fixture(scope="function", params=[False, True])
def _async(self, request):
yield request.param
def call_back(self):
assert True
@pytest.mark.tags(CaseLabel.L2)
def test_create_index_with_diskann_normal(self):
"""
target: test create index with diskann
method: 1.create collection and insert data
2.create diskann index , then load data
3.search successfully
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
assert collection_w.num_entities == default_nb
index, _ = self.index_wrap.init_index(collection_w.collection, default_float_vec_field_name,
ct.default_diskann_index)
log.info(self.index_wrap.params)
cf.assert_equal_index(index, collection_w.indexes[0])
collection_w.load()
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
ct.default_diskann_search_params, default_limit,
default_search_exp,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("dim", [ct.min_dim, ct.max_dim])
def test_create_index_diskann_with_max_min_dim(self, dim):
"""
target: test create index with diskann
method: 1.create collection, when the max dim of the vector is 32768
2.create diskann index
expected: create index raise an error
"""
collection_w = self.init_collection_general(prefix, False, dim=dim, is_index=False)[0]
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index)
assert len(collection_w.indexes) == 1
@pytest.mark.tags(CaseLabel.L2)
def test_create_index_with_diskann_callback(self, _async):
"""
target: test create index with diskann
method: 1.create collection and insert data
2.create diskann index ,then load
3.search
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
assert collection_w.num_entities == default_nb
res, _ = collection_w.create_index(ct.default_float_vec_field_name, ct.default_diskann_index,
index_name=ct.default_index_name, _async=_async,
_callback=self.call_back())
if _async:
res.done()
assert len(collection_w.indexes) == 1
collection_w.load()
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
ct.default_diskann_search_params, default_limit,
default_search_exp,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit})
@pytest.mark.tags(CaseLabel.L2)
def test_create_diskann_index_drop_with_async(self, _async):
"""
target: test create index interface
method: create collection and add entities in it, create diskann index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
assert collection_w.num_entities == default_nb
res, _ = collection_w.create_index(ct.default_float_vec_field_name, ct.default_diskann_index,
index_name=ct.default_index_name, _async=_async)
if _async:
res.done()
assert len(collection_w.indexes) == 1
collection_w.release()
collection_w.drop_index(index_name=ct.default_index_name)
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L2)
def test_create_diskann_index_with_partition(self):
"""
target: test create index with diskann
method: 1.create collection , partition and insert data to partition
2.create diskann index ,then load
expected: create index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_schema)
partition_name = cf.gen_unique_str(prefix)
partition_w = self.init_partition_wrap(collection_w, partition_name)
assert collection_w.has_partition(partition_name)[0]
df = cf.gen_default_list_data(ct.default_nb)
ins_res, _ = partition_w.insert(df)
assert len(ins_res.primary_keys) == ct.default_nb
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index,
index_name=field_name)
collection_w.load()
assert collection_w.has_index(index_name=field_name)[0] is True
assert len(collection_w.indexes) == 1
collection_w.release()
collection_w.drop_index(index_name=field_name)
assert collection_w.has_index(index_name=field_name)[0] is False
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L2)
def test_drop_diskann_index_with_normal(self):
"""
target: test drop diskann index normal
method: 1.create collection and insert data
2.create index and uses collection.drop_index () drop index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
assert collection_w.num_entities == default_nb
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index, index_name=index_name1)
collection_w.load()
assert len(collection_w.indexes) == 1
collection_w.release()
collection_w.drop_index(index_name=index_name1)
assert collection_w.has_index(index_name=index_name1)[0] is False
@pytest.mark.tags(CaseLabel.L2)
def test_drop_diskann_index_and_create_again(self):
"""
target: test drop diskann index normal
method: 1.create collection and insert data
2.create index and uses collection.drop_index () drop index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
collection_w.create_index(default_field_name, ct.default_diskann_index)
collection_w.load()
assert len(collection_w.indexes) == 1
collection_w.release()
collection_w.drop_index()
assert len(collection_w.indexes) == 0
collection_w.create_index(default_field_name, ct.default_diskann_index)
assert len(collection_w.indexes) == 1
@pytest.mark.tags(CaseLabel.L2)
def test_create_more_than_three_index(self):
"""
target: test create diskann index
method: 1.create collection and insert data
2.create different index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
assert collection_w.num_entities == default_nb
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index, index_name="a")
assert collection_w.has_index(index_name="a")[0] == True
collection_w.create_index(default_string_field_name, default_string_index_params, index_name="b")
assert collection_w.has_index(index_name="b")[0] == True
default_params = {}
collection_w.create_index("float", default_params, index_name="c")
assert collection_w.has_index(index_name="c")[0] == True
@pytest.mark.tags(CaseLabel.L2)
def test_drop_diskann_index_with_partition(self):
"""
target: test drop diskann index normal
method: 1.create collection and insert data
2.create diskann index and uses collection.drop_index () drop index
expected: drop index successfully
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_schema)
partition_name = cf.gen_unique_str(prefix)
partition_w = self.init_partition_wrap(collection_w, partition_name)
assert collection_w.has_partition(partition_name)[0]
df = cf.gen_default_list_data()
ins_res, _ = partition_w.insert(df)
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index)
collection_w.load()
assert len(collection_w.indexes) == 1
collection_w.release()
collection_w.drop_index()
assert len(collection_w.indexes) == 0
@pytest.mark.tags(CaseLabel.L2)
def test_create_diskann_index_with_binary(self):
"""
target: test create diskann index with binary
method: 1.create collection and insert binary data
2.create diskann index
expected: report an error
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
df, _ = cf.gen_default_binary_dataframe_data()
collection_w.insert(data=df)
collection_w.create_index(default_binary_vec_field_name, ct.default_diskann_index, index_name=binary_field_name,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100,
ct.err_msg: "binary vector index does not support metric type: COSINE"})
@pytest.mark.tags(CaseLabel.L2)
def test_create_diskann_index_multithread(self):
"""
target: test create index interface with multiprocess
method: create collection and add entities in it, create diskann index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data(default_nb)
collection_w.insert(data=data)
assert collection_w.num_entities == default_nb
def build(collection_w):
collection_w.create_index(ct.default_float_vec_field_name, ct.default_diskann_index)
threads_num = 10
threads = []
for i in range(threads_num):
t = MyThread(target=build, args=(collection_w,))
threads.append(t)
t.start()
time.sleep(0.2)
for t in threads:
t.join()
@pytest.mark.tags(CaseLabel.L2)
def test_diskann_enable_mmap(self):
"""
target: enable mmap for unsupported indexes
method: diskann index enable mmap
expected: unsupported
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
collection_w.insert(cf.gen_default_list_data())
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index,
index_name=ct.default_index_name)
collection_w.set_properties({'mmap.enabled': True})
desc, _ = collection_w.describe()
pro = desc.get("properties")
assert pro["mmap.enabled"] == 'True'
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True},
check_task=CheckTasks.err_res,
check_items={ct.err_code: 104,
ct.err_msg: f"index type DISKANN does not support mmap"})
@pytest.mark.tags(CaseLabel.GPU)
class TestAutoIndex(TestcaseBase):
""" Test case of Auto index """
@pytest.mark.tags(CaseLabel.L0)
def test_create_autoindex_with_no_params(self):
"""
target: test create auto index with no params
method: create index with only one field name
expected: create successfully
"""
collection_w = self.init_collection_general(prefix, is_index=False)[0]
collection_w.create_index(field_name)
actual_index_params = collection_w.index()[0].params
assert default_autoindex_params == actual_index_params
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index_params", cf.gen_autoindex_params())
def test_create_autoindex_with_params(self, index_params):
"""
target: test create auto index with params
method: create index with different params
expected: create successfully
"""
collection_w = self.init_collection_general(prefix, is_index=False)[0]
collection_w.create_index(field_name, index_params)
actual_index_params = collection_w.index()[0].params
log.info(collection_w.index()[0].params)
expect_autoindex_params = copy.copy(default_autoindex_params)
if index_params.get("index_type"):
if index_params["index_type"] != 'AUTOINDEX':
expect_autoindex_params = index_params
if index_params.get("metric_type"):
expect_autoindex_params["metric_type"] = index_params["metric_type"]
assert actual_index_params == expect_autoindex_params
@pytest.mark.tags(CaseLabel.L2)
def test_create_autoindex_with_invalid_params(self):
"""
target: test create auto index with invalid params
method: create index with invalid params
expected: raise exception
"""
collection_w = self.init_collection_general(prefix, is_index=False)[0]
index_params = {"metric_type": "L2", "nlist": "1024", "M": "100"}
collection_w.create_index(field_name, index_params,
check_task=CheckTasks.err_res,
check_items={"err_code": 1,
"err_msg": "only metric type can be "
"passed when use AutoIndex"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_autoindex_on_binary_vectors(self):
"""
target: test create auto index on binary vectors
method: create index on binary vectors
expected: raise exception
"""
collection_w = self.init_collection_general(prefix, is_binary=True, is_index=False)[0]
collection_w.create_index(binary_field_name, {})
assert collection_w.index()[0].params == {'index_type': 'AUTOINDEX', 'metric_type': 'HAMMING'}
@pytest.mark.tags(CaseLabel.L1)
def test_create_autoindex_on_all_vector_type(self):
"""
target: test create auto index on all vector type
method: create index on all vector type
expected: raise exception
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float16_vec_field("fp16"),
cf.gen_bfloat16_vec_field("bf16"), cf.gen_sparse_vec_field("sparse")]
schema = cf.gen_collection_schema(fields=fields)
collection_w = self.init_collection_wrap(schema=schema)
collection_w.create_index("fp16", index_name="fp16")
assert all(item in default_autoindex_params.items() for item in
collection_w.index()[0].params.items())
collection_w.create_index("bf16", index_name="bf16")
assert all(item in default_autoindex_params.items() for item in
collection_w.index(index_name="bf16")[0].params.items())
collection_w.create_index("sparse", index_name="sparse")
assert all(item in default_sparse_autoindex_params.items() for item in
collection_w.index(index_name="sparse")[0].params.items())
@pytest.mark.tags(CaseLabel.GPU)
class TestScaNNIndex(TestcaseBase):
""" Test case of Auto index """
@pytest.mark.tags(CaseLabel.L1)
def test_create_scann_index(self):
"""
target: test create scann index
method: create index with only one field name
expected: create successfully
"""
collection_w = self.init_collection_general(prefix, is_index=False)[0]
index_params = {"index_type": "SCANN", "metric_type": "L2",
"params": {"nlist": 1024, "with_raw_data": True}}
collection_w.create_index(default_field_name, index_params)
assert collection_w.has_index()[0] is True
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("nlist", [0, 65537])
def test_create_scann_index_nlist_invalid(self, nlist):
"""
target: test create scann index invalid
method: create index with invalid nlist
expected: report error
"""
collection_w = self.init_collection_general(prefix, is_index=False)[0]
index_params = {"index_type": "SCANN", "metric_type": "L2", "params": {"nlist": nlist}}
error = {ct.err_code: 999, ct.err_msg: f"Out of range in json: param 'nlist' ({nlist}) should be in range [1, 65536]"}
collection_w.create_index(default_field_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("dim", [3, 127])
def test_create_scann_index_dim_invalid(self, dim):
"""
target: test create scann index invalid
method: create index on vector dim % 2 == 1
expected: report error
"""
collection_w = self.init_collection_general(prefix, is_index=False, dim=dim)[0]
index_params = {"index_type": "SCANN", "metric_type": "L2", "params": {"nlist": 1024}}
error = {ct.err_code: 1100,
ct.err_msg: f"The dimension of a vector (dim) should be a multiple of sub_dim. "
f"Dimension:{dim}, sub_dim:2: invalid parameter"}
collection_w.create_index(default_field_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.GPU)
class TestInvertedIndexValid(TestcaseBase):
"""
Test create / describe / drop index interfaces with inverted index
"""
@pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED"])
def scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=ct.all_dense_vector_types)
def vector_data_type(self, request):
yield request.param
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("scalar_field_name", [ct.default_int8_field_name, ct.default_int16_field_name,
ct.default_int32_field_name, ct.default_int64_field_name,
ct.default_float_field_name, ct.default_double_field_name,
ct.default_string_field_name, ct.default_bool_field_name])
def test_create_inverted_index_on_all_supported_scalar_field(self, scalar_field_name):
"""
target: test create scalar index all supported scalar field
method: 1.create collection, and create index
expected: create index successfully
"""
collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, is_all_data_type=True)[0]
scalar_index_params = {"index_type": "INVERTED"}
index_name = "scalar_index_name"
collection_w.create_index(scalar_field_name, index_params=scalar_index_params, index_name=index_name)
assert collection_w.has_index(index_name=index_name)[0] is True
index_list = self.utility_wrap.list_indexes(collection_w.name)[0]
assert index_name in index_list
collection_w.flush()
result = self.utility_wrap.index_building_progress(collection_w.name, index_name)[0]
# assert False
start = time.time()
while True:
time.sleep(1)
res, _ = self.utility_wrap.index_building_progress(collection_w.name, index_name)
if 0 < res['indexed_rows'] <= default_nb:
break
if time.time() - start > 5:
raise MilvusException(1, f"Index build completed in more than 5s")
@pytest.mark.tags(CaseLabel.L2)
def test_create_multiple_inverted_index(self):
"""
target: test create multiple scalar index
method: 1.create collection, and create index
expected: create index successfully
"""
collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0]
scalar_index_params = {"index_type": "INVERTED"}
index_name = "scalar_index_name_0"
collection_w.create_index(ct.default_int8_field_name, index_params=scalar_index_params, index_name=index_name)
assert collection_w.has_index(index_name=index_name)[0] is True
index_name = "scalar_index_name_1"
collection_w.create_index(ct.default_int32_field_name, index_params=scalar_index_params, index_name=index_name)
assert collection_w.has_index(index_name=index_name)[0] is True
@pytest.mark.tags(CaseLabel.L2)
def test_create_all_inverted_index(self):
"""
target: test create multiple scalar index
method: 1.create collection, and create index
expected: create index successfully
"""
collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0]
scalar_index_params = {"index_type": "INVERTED"}
scalar_fields = [ct.default_int8_field_name, ct.default_int16_field_name,
ct.default_int32_field_name, ct.default_int64_field_name,
ct.default_float_field_name, ct.default_double_field_name,
ct.default_string_field_name, ct.default_bool_field_name]
for i in range(len(scalar_fields)):
index_name = f"scalar_index_name_{i}"
collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name)
assert collection_w.has_index(index_name=index_name)[0] is True
@pytest.mark.tags(CaseLabel.L2)
def test_create_all_scalar_index(self):
"""
target: test create multiple scalar index
method: 1.create collection, and create index
expected: create index successfully
"""
collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0]
scalar_index = ["Trie", "STL_SORT", "INVERTED"]
scalar_fields = [ct.default_string_field_name, ct.default_int16_field_name,
ct.default_int32_field_name]
for i in range(len(scalar_fields)):
index_name = f"scalar_index_name_{i}"
scalar_index_params = {"index_type": f"{scalar_index[i]}"}
collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name)
assert collection_w.has_index(index_name=index_name)[0] is True
@pytest.mark.tags(CaseLabel.L0)
def test_binary_arith_expr_on_inverted_index(self):
prefix = "test_binary_arith_expr_on_inverted_index"
nb = 5000
collection_w, _, _, insert_ids, _ = self.init_collection_general(prefix, insert_data=True, is_index=True,
is_all_data_type=True)
index_name = "test_binary_arith_expr_on_inverted_index"
scalar_index_params = {"index_type": "INVERTED"}
collection_w.release()
collection_w.create_index(ct.default_int64_field_name, index_params=scalar_index_params, index_name=index_name)
collection_w.load()
# query and verify result
res = collection_w.query(expr=f"{ct.default_int64_field_name} % 10 == 0")[0]
query_ids = set(map(lambda x: x[ct.default_int64_field_name], res))
filter_ids = set([_id for _id in insert_ids if _id % 10 == 0])
assert query_ids == set(filter_ids)
class TestBitmapIndex(TestcaseBase):
"""
Functional `BITMAP` index
Author: Ting.Wang
"""
def setup_method(self, method):
super().setup_method(method)
# connect to server before testing
self._connect()
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
def test_bitmap_on_primary_key_field(self, request, primary_field, auto_id):
"""
target:
1. build BITMAP index on primary key field
method:
1. create an empty collection
2. build `BITMAP` index on primary key field
expected:
1. Primary key field does not support building bitmap index
"""
# init params
collection_name = f"{request.function.__name__}_{primary_field}_{auto_id}"
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
auto_id=auto_id
)
)
# build `BITMAP` index on primary key field
self.collection_wrap.create_index(
field_name=primary_field, index_params={"index_type": IndexName.BITMAP}, index_name=primary_field,
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapOnPK})
@pytest.mark.tags(CaseLabel.L0)
def test_bitmap_on_not_supported_fields(self, request):
"""
target:
1. build `BITMAP` index on not supported fields
method:
1. create an empty collection with fields:
[`varchar_pk`, `SPARSE_FLOAT_VECTOR`, `FLOAT`, `DOUBLE`, `JSON`, `ARRAY`, `ARRAY_FLOAT`, `ARRAY_DOUBLE`]
2. build different `BITMAP` index params on not supported fields
expected:
1. check build index failed, assert error code and message
"""
# init params
collection_name, primary_field = f"{request.function.__name__}", "varchar_pk"
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.bitmap_not_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict}
)
)
# build `BITMAP` index on sparse vector field
for msg, index_params in {
iem.VectorMetricTypeExist: IndexPrams(index_type=IndexName.BITMAP),
iem.SparseFloatVectorMetricType: IndexPrams(index_type=IndexName.BITMAP, metric_type=MetricType.L2),
iem.CheckVectorIndex.format("SparseFloatVector", IndexName.BITMAP): IndexPrams(
index_type=IndexName.BITMAP, metric_type=MetricType.IP)
}.items():
self.collection_wrap.create_index(
field_name=DataType.SPARSE_FLOAT_VECTOR.name, index_params=index_params.to_dict,
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: msg}
)
# build `BITMAP` index on not supported scalar fields
for _field_name in self.bitmap_not_support_dtype_names:
self.collection_wrap.create_index(
field_name=_field_name, index_params=IndexPrams(index_type=IndexName.BITMAP).to_dict,
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapIndex}
)
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
def test_bitmap_on_empty_collection(self, request, primary_field, auto_id):
"""
target:
1. create `BITMAP` index on all supported fields
2. build scalar index on loaded collection
method:
1. build and drop `BITMAP` index on an empty collection
2. rebuild `BITMAP` index on loaded collection
3. drop index on loaded collection
4. re-build the same index on loaded collection
expected:
1. build and drop index successful on a not loaded collection
2. build index successful on non-indexed and loaded fields
3. can drop index on loaded collection
"""
# init params
collection_name, nb = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
auto_id=auto_id
)
)
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# drop scalars' index
self.drop_multi_index(index_names=list(set(index_params.keys()) - {DataType.FLOAT_VECTOR.name}))
assert len(self.collection_wrap.indexes) == 1
# load collection
self.collection_wrap.load()
# build scalars' index after loading collection
self.build_multi_index(index_params={k: v for k, v in index_params.items() if v.index_type == IndexName.BITMAP})
# reload collection
self.collection_wrap.load()
# re-drop scalars' index
self.drop_multi_index(index_names=list(set(index_params.keys()) - {DataType.FLOAT_VECTOR.name}))
# re-build loaded index
self.build_multi_index(index_params=index_params)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("index_obj, field_name", [(DefaultScalarIndexParams.Default, 'INT64_hybrid_index'),
(DefaultScalarIndexParams.INVERTED, 'INT64_inverted'),
(DefaultScalarIndexParams.STL_SORT, 'INT64_stl_sort'),
(DefaultScalarIndexParams.Trie, 'VARCHAR_trie')])
def test_bitmap_offset_cache_on_not_bitmap_fields(self, request, index_obj, field_name):
"""
target:
1. alter offset cache on not `BITMAP` index scalar field
method:
1. build scalar index on scalar field
2. alter offset cache on scalar index field
expected:
1. alter index raises expected error
"""
# init params
collection_name, primary_field = f"{request.function.__name__}_{field_name}", 'INT64_pk'
# create a collection with fields
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, field_name],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build scalar index on empty collection
index_params = {
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
**index_obj(field_name)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# enable offset cache and raises error
self.collection_wrap.alter_index(
index_name=field_name, extra_params=AlterIndexParams.index_offset_cache(),
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.InvalidOffsetCache}
)
@pytest.mark.tags(CaseLabel.L1)
def test_bitmap_offset_cache_on_vector_field(self, request):
"""
target:
1. alter offset cache on vector field
method:
1. build vector index on an empty collection
2. alter offset cache on vector index field
expected:
1. alter index raises expected error
"""
# init params
collection_name, primary_field = f"{request.function.__name__}", 'INT64_pk'
# create a collection with fields
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build index on empty collection
index_params = DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name)
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# enable offset cache and raises error
self.collection_wrap.alter_index(
index_name=DataType.FLOAT_VECTOR.name, extra_params=AlterIndexParams.index_offset_cache(),
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.InvalidOffsetCache}
)
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_offset_cache_alter_after_loading(self, request):
"""
target:
1. alter offset cache on `BITMAP` index scalar
2. alter offset cache on loaded collection failed
method:
1. build scalar index on scalar field
2. alter offset cache on scalar field
3. load collection
4. alter offset cache again
expected:
1. alter index raises expected error after loading collection
"""
# init params
collection_name, primary_field = f"{request.function.__name__}", 'INT64_pk'
# create a collection with fields
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build scalar index on empty collection
index_params = {
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# enable offset cache
for n in self.bitmap_support_dtype_names:
self.collection_wrap.alter_index(index_name=n, extra_params=AlterIndexParams.index_offset_cache())
self.collection_wrap.load()
# enable offset cache on loaded collection
for n in self.bitmap_support_dtype_names:
self.collection_wrap.alter_index(
index_name=n, extra_params=AlterIndexParams.index_offset_cache(),
check_task=CheckTasks.err_res, check_items={ct.err_code: 104, ct.err_msg: iem.AlterOnLoadedCollection})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
def test_bitmap_insert_after_loading(self, request, primary_field, auto_id):
"""
target:
1. insert data after building `BITMAP` index and loading collection
method:
1. build index and loaded an empty collection
2. insert 3k data
3. check no indexed data
4. flush collection, re-build index and refresh load collection
5. row number of indexed data equal to insert data
expected:
1. insertion is successful
2. segment row number == inserted rows
"""
# init params
collection_name, nb = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
auto_id=auto_id
)
)
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# load collection
self.collection_wrap.load()
# prepare 3k data (> 1024 triggering index building)
self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
check_task=CheckTasks.check_insert_result)
# check no indexed segments
res, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
assert len(res) == 0
# flush collection, segment sealed
self.collection_wrap.flush()
# re-build vector field index
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name))
# load refresh, ensure that loaded indexed segments
self.collection_wrap.load(_refresh=True)
# check segment row number
counts = [int(n.num_rows) for n in self.utility_wrap.get_query_segment_info(collection_name=collection_name)[0]]
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
def test_bitmap_insert_before_loading(self, request, primary_field, auto_id):
"""
target:
1. insert data before building `BITMAP` index and loading collection
method:
1. insert data into an empty collection
2. flush collection, build index and load collection
3. the number of segments equal to shards_num
expected:
1. insertion is successful
2. the number of segments == shards_num
3. segment row number == inserted rows
"""
# init params
collection_name, nb, shards_num = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000, 16
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
auto_id=auto_id
),
shards_num=shards_num
)
# prepare data (> 1024 triggering index building)
pk_type = "str" if primary_field.startswith(DataType.VARCHAR.name.lower()) else "int"
default_values = {} if auto_id else {primary_field: [eval(f"{pk_type}({n})") for n in range(nb)]}
self.collection_wrap.insert(
data=cf.gen_values(self.collection_wrap.schema, nb=nb, default_values=default_values),
check_task=CheckTasks.check_insert_result
)
# flush collection, segment sealed
self.collection_wrap.flush()
# build `BITMAP` index
index_params = {
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# load collection
self.collection_wrap.load()
# get segment info
segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
# check segment counts == shards_num
assert len(segment_info) == shards_num
# check segment row number
counts = [int(n.num_rows) for n in segment_info]
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
@pytest.mark.parametrize("shards_num, nb", [(2, 3791), (16, 1600), (16, 10)])
def test_bitmap_primary_field_data_repeated(self, request, primary_field, shards_num, nb):
"""
target:
1. the same pk value is inserted into the same shard
method:
1. generate the same pk value and insert data into an empty collection
2. flush collection, build index and load collection
3. the number of segments equal to 1
4. row number of indexed data equal to insert data
expected:
1. insertion is successful
2. the number of segments == 1
3. segment row number == inserted rows
"""
# init params
collection_name = f"{request.function.__name__}_{primary_field}_{shards_num}_{nb}"
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
),
shards_num=shards_num
)
# prepare data (> 1024 triggering index building)
pk_key = str(shards_num) if primary_field.startswith(DataType.VARCHAR.name.lower()) else shards_num
self.collection_wrap.insert(
data=cf.gen_values(self.collection_wrap.schema, nb=nb,
default_values={primary_field: [pk_key for _ in range(nb)]}),
check_task=CheckTasks.check_insert_result
)
# flush collection, segment sealed
self.collection_wrap.flush()
# build `BITMAP` index
index_params = {
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# load collection
self.collection_wrap.load()
# get segment info
segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
# check segments count
msg = f"`{collection_name}` Segments count:{len(segment_info)} != 1, pk field data is repeated."
assert len(segment_info) == 1, msg
# check segment row number
counts = [int(n.num_rows) for n in segment_info]
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
@pytest.mark.parametrize("shards_num, nb", [(1, 1000), (2, 3791), (16, 1600), (16, 10)])
def test_bitmap_primary_field_data_not_repeated(self, request, primary_field, shards_num, nb):
"""
target:
1. different pk values are inserted into the different shards
method:
1. generate different pk values and insert data into an empty collection
2. flush collection, build index and load collection
3. the number of segments equal to shards_num or less than insert data
4. row number of indexed data equal to insert data
expected:
1. insertion is successful
2. the number of segments == shards_num or <= insert data
3. segment row number == inserted rows
"""
# init params
collection_name = f"{request.function.__name__}_{primary_field}_{shards_num}_{nb}"
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
),
shards_num=shards_num
)
# prepare data (> 1024 triggering index building)
pk_type = "str" if primary_field.startswith(DataType.VARCHAR.name.lower()) else "int"
self.collection_wrap.insert(
data=cf.gen_values(self.collection_wrap.schema, nb=nb,
default_values={primary_field: [eval(f"{pk_type}({n})") for n in range(nb)]}),
check_task=CheckTasks.check_insert_result
)
# flush collection, segment sealed
self.collection_wrap.flush()
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# load collection
self.collection_wrap.load()
# get segment info
segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
# check segments count
if shards_num > nb:
msg = f"`{collection_name}` Segments count:{len(segment_info)} > insert data:{nb}"
assert len(segment_info) <= nb, msg
else:
msg = f"`{collection_name}` Segments count:{len(segment_info)} != shards_num:{shards_num}"
assert len(segment_info) == shards_num, msg
# check segment row number
counts = [int(n.num_rows) for n in segment_info]
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("extra_params, name", [(AlterIndexParams.index_offset_cache(), 'offset_cache_true'),
(AlterIndexParams.index_offset_cache(False), 'offset_cache_false'),
(AlterIndexParams.index_mmap(), 'mmap_true'),
(AlterIndexParams.index_mmap(False), 'mmap_false')])
def test_bitmap_alter_index(self, request, extra_params, name):
"""
target:
1. alter index and rebuild index again
- `{indexoffsetcache.enabled: <bool>}`
- `{mmap.enabled: <bool>}`
method:
1. create a collection with scalar fields
2. build BITMAP index on scalar fields
3. altering index `indexoffsetcache`/ `mmap`
4. insert some data and flush
5. rebuild indexes with the same params again
6. check altering index
7. load collection
expected:
1. alter index not failed
2. rebuild index not failed
3. load not failed
"""
# init params
collection_name, primary_field, nb = f"{request.function.__name__}_{name}", "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# enable offset cache / mmap
for index_name in self.bitmap_support_dtype_names:
self.collection_wrap.alter_index(index_name=index_name, extra_params=extra_params)
# prepare data (> 1024 triggering index building)
self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# rebuild `BITMAP` index
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# check alter index
scalar_indexes = [{i.field_name: i.params} for i in self.collection_wrap.indexes if
i.field_name in self.bitmap_support_dtype_names]
msg = f"Scalar indexes: {scalar_indexes}, expected all to contain {extra_params}"
assert len([i for i in scalar_indexes for v in i.values() if not cf.check_key_exist(extra_params, v)]) == 0, msg
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_alter_cardinality_limit(self, request):
"""
target:
1. alter index `bitmap_cardinality_limit` failed
method:
1. create a collection with scalar fields
2. build BITMAP index on scalar fields
3. altering index `bitmap_cardinality_limit`
expected:
1. alter index failed with param `bitmap_cardinality_limit`
"""
# init params
collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# alter `bitmap_cardinality_limit` failed
for index_name in self.bitmap_support_dtype_names:
self.collection_wrap.alter_index(
index_name=index_name, extra_params={"bitmap_cardinality_limit": 10}, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100, ct.err_msg: iem.NotConfigable.format("bitmap_cardinality_limit")})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("bitmap_cardinality_limit", [-10, 0, 1001])
@pytest.mark.skip("need hybrid index config, not available now")
def test_bitmap_cardinality_limit_invalid(self, request, bitmap_cardinality_limit):
"""
target:
1. check auto index setting `bitmap_cardinality_limit` param
method:
1. create a collection with scalar fields
4. build scalar index with `bitmap_cardinality_limit`
expected:
1. build index failed
"""
# init params
collection_name = f"{request.function.__name__}_{str(bitmap_cardinality_limit).replace('-', '_')}"
primary_field, nb = "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, DataType.INT64.name],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build scalar index and check failed
self.collection_wrap.create_index(
field_name=DataType.INT64.name, index_name=DataType.INT64.name,
index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit})
assert self.collection_wrap.index()[0].params == {'bitmap_cardinality_limit': str(bitmap_cardinality_limit),
'index_type': IndexName.AUTOINDEX}
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index_params, name", [({"index_type": IndexName.AUTOINDEX}, "AUTOINDEX"), ({}, "None")])
def test_bitmap_cardinality_limit_check(self, request, index_params, name):
"""
target:
1. check that only one `bitmap_cardinality_limit` value can be set on a field
method:
1. create a collection with scalar fields
2. build scalar index with `bitmap_cardinality_limit`
3. re-build scalar index with different `bitmap_cardinality_limit`
4. drop all scalar index
5. build scalar index with different `bitmap_cardinality_limit`
expected:
1. re-build scalar index failed
2. after dropping scalar index, rebuild scalar index with different `bitmap_cardinality_limit` succeeds
"""
# init params
collection_name, primary_field = f"{request.function.__name__}_{name}", "int64_pk"
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
for scalar_field in self.bitmap_support_dtype_names:
# build scalar index
self.collection_wrap.create_index(field_name=scalar_field, index_name=scalar_field,
index_params={**index_params, "bitmap_cardinality_limit": 200})
# build scalar index with different `bitmap_cardinality_limit`
self.collection_wrap.create_index(field_name=scalar_field, index_name=scalar_field,
index_params={**index_params, "bitmap_cardinality_limit": 300},
check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535, ct.err_msg: iem.OneIndexPerField})
self.drop_multi_index(self.bitmap_support_dtype_names)
# re-build scalar index
for scalar_field in self.bitmap_support_dtype_names:
self.collection_wrap.create_index(field_name=scalar_field, index_name=scalar_field,
index_params={**index_params, "bitmap_cardinality_limit": 300})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("bitmap_cardinality_limit", [1, 100, 1000])
@pytest.mark.parametrize("index_params, name", [({"index_type": IndexName.AUTOINDEX}, "AUTOINDEX"), ({}, "None")])
def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit, index_params, name):
"""
target:
1. check auto index setting `bitmap_cardinality_limit` not failed
method:
1. create a collection with scalar fields
2. insert some data and flush
3. build vector index
4. build scalar index with `bitmap_cardinality_limit`
expected:
1. alter index not failed
2. rebuild index not failed
3. load not failed
Notice:
This parameter setting does not automatically check whether the result meets expectations,
but is only used to verify that the index is successfully built.
"""
# init params
collection_name = f"{request.function.__name__}_{bitmap_cardinality_limit}_{name}"
primary_field, nb = "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# prepare data (> 1024 triggering index building)
self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# build vector index
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
# build scalar index
for scalar_field in self.bitmap_support_dtype_names:
self.collection_wrap.create_index(
field_name=scalar_field, index_name=scalar_field,
index_params={**index_params, "bitmap_cardinality_limit": bitmap_cardinality_limit})
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("config, cardinality_data_range, name",
[({"bitmap_cardinality_limit": 1000}, (-128, 127), 1000),
({"bitmap_cardinality_limit": 100}, (-128, 127), 100),
({}, (1, 100), "None_100"), ({}, (1, 99), "None_99")])
def test_bitmap_cardinality_limit_low_data(self, request, config, name, cardinality_data_range):
"""
target:
1. check auto index setting `bitmap_cardinality_limit`(default value=100) and insert low cardinality data
method:
1. create a collection with scalar fields
2. insert some data and flush
3. build vector index
4. build scalar index with `bitmap_cardinality_limit`
expected:
1. alter index not failed
2. rebuild index not failed
3. load not failed
Notice:
This parameter setting does not automatically check whether the result meets expectations,
but is only used to verify that the index is successfully built.
"""
# init params
collection_name, primary_field, nb = f"{request.function.__name__}_{name}", "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# prepare data (> 1024 triggering index building)
low_cardinality = [random.randint(*cardinality_data_range) for _ in range(nb)]
self.collection_wrap.insert(
data=cf.gen_values(
self.collection_wrap.schema, nb=nb,
default_values={
# set all INT values
**{k.name: low_cardinality for k in
[DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64]},
# set VARCHAR value
DataType.VARCHAR.name: [str(i) for i in low_cardinality],
# set all ARRAY + INT values
**{f"{DataType.ARRAY.name}_{k.name}": [[i] for i in low_cardinality] for k in
[DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64]},
# set ARRAY + VARCHAR values
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}": [[str(i)] for i in low_cardinality],
}),
check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# build vector index
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
# build scalar index
for scalar_field in self.bitmap_support_dtype_names:
self.collection_wrap.create_index(
field_name=scalar_field, index_name=scalar_field,
index_params={"index_type": IndexName.AUTOINDEX, **config})
# load collection
self.collection_wrap.load()