From 3579ae8240912c48ed3864ebd2a0ede02ee7d58c Mon Sep 17 00:00:00 2001 From: jingkl <34296482+jingkl@users.noreply.github.com> Date: Wed, 11 May 2022 21:55:53 +0800 Subject: [PATCH] [test]Add the string testcase of index and query (#16884) Signed-off-by: jingkl --- .../python_client/base/collection_wrapper.py | 26 ++- tests/python_client/base/index_wrapper.py | 16 +- tests/python_client/requirements.txt | 2 +- tests/python_client/testcases/test_index.py | 194 +++++++++++++++++- tests/python_client/testcases/test_query.py | 90 +++++++- 5 files changed, 309 insertions(+), 19 deletions(-) diff --git a/tests/python_client/base/collection_wrapper.py b/tests/python_client/base/collection_wrapper.py index e47697c3d0..76a57021a2 100644 --- a/tests/python_client/base/collection_wrapper.py +++ b/tests/python_client/base/collection_wrapper.py @@ -14,7 +14,7 @@ from pymilvus.orm.types import CONSISTENCY_STRONG from common.common_func import param_info TIMEOUT = 20 - +INDEX_NAME = "_default_idx" # keep small timeout for stability tests # TIMEOUT = 5 @@ -218,9 +218,11 @@ class ApiCollectionWrapper: return res, check_result @trace() - def create_index(self, field_name, index_params, check_task=None, check_items=None, **kwargs): + def create_index(self, field_name, index_params, index_name=None, check_task=None, check_items=None, **kwargs): timeout = kwargs.get("timeout", TIMEOUT * 2) - kwargs.update({"timeout": timeout}) + index_name = INDEX_NAME if index_name is None else index_name + index_name = kwargs.get("index_name", index_name) + kwargs.update({"timeout": timeout, "index_name": index_name}) func_name = sys._getframe().f_code.co_name res, check = api_request([self.collection.create_index, field_name, index_params], **kwargs) @@ -229,17 +231,23 @@ class ApiCollectionWrapper: return res, check_result @trace() - def has_index(self, check_task=None, check_items=None): + def has_index(self, index_name=None, check_task=None, check_items=None, **kwargs): + index_name = INDEX_NAME if index_name is None else index_name + index_name = kwargs.get("index_name", index_name) + kwargs.update({"index_name": index_name}) + func_name = sys._getframe().f_code.co_name - res, check = api_request([self.collection.has_index]) - check_result = ResponseChecker(res, func_name, check_task, check_items, check).run() + res, check = api_request([self.collection.has_index], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() return res, check_result @trace() - def drop_index(self, check_task=None, check_items=None, **kwargs): + def drop_index(self, index_name=None, check_task=None, check_items=None, **kwargs): timeout = kwargs.get("timeout", TIMEOUT) - kwargs.update({"timeout": timeout}) - + index_name = INDEX_NAME if index_name is None else index_name + index_name = kwargs.get("index_name", index_name) + kwargs.update({"timeout": timeout, "index_name": index_name}) + func_name = sys._getframe().f_code.co_name res, check = api_request([self.collection.drop_index], **kwargs) check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() diff --git a/tests/python_client/base/index_wrapper.py b/tests/python_client/base/index_wrapper.py index 9a0d9e9baf..e12f3369bc 100644 --- a/tests/python_client/base/index_wrapper.py +++ b/tests/python_client/base/index_wrapper.py @@ -6,15 +6,19 @@ from check.func_check import ResponseChecker from utils.api_request import api_request -TIMEOUT = 20 +TIMEOUT = 20 +INDEX_NAME = "_default_idx" class ApiIndexWrapper: index = None - def init_index(self, collection, field_name, index_params, check_task=None, check_items=None, **kwargs): + def init_index(self, collection, field_name, index_params, index_name=None, check_task=None, check_items=None, **kwargs): timeout = kwargs.get("timeout", TIMEOUT * 2) - kwargs.update({"timeout": timeout}) + index_name = INDEX_NAME if index_name is None else index_name + index_name = kwargs.get("index_name", index_name) + kwargs.update({"timeout": timeout, "index_name": index_name}) + """ In order to distinguish the same name of index """ func_name = sys._getframe().f_code.co_name res, is_succ = api_request([Index, collection, field_name, index_params], **kwargs) @@ -24,9 +28,11 @@ class ApiIndexWrapper: index_params=index_params, **kwargs).run() return res, check_result - def drop(self, check_task=None, check_items=None, **kwargs): + def drop(self, index_name=None ,check_task=None, check_items=None, **kwargs): timeout = kwargs.get("timeout", TIMEOUT) - kwargs.update({"timeout": timeout}) + index_name = INDEX_NAME if index_name is None else index_name + index_name = kwargs.get("index_name", index_name) + kwargs.update({"timeout": timeout, "index_name": index_name}) func_name = sys._getframe().f_code.co_name res, is_succ = api_request([self.index.drop], **kwargs) diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index c761aff5ef..31eea3f7f8 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -9,7 +9,7 @@ allure-pytest==2.7.0 pytest-print==0.2.1 pytest-level==0.1.1 pytest-xdist==2.2.1 -pymilvus==2.1.0.dev50 +pymilvus==2.1.0.dev56 pytest-rerunfailures==9.1.1 git+https://github.com/Projectplace/pytest-tags ndg-httpsclient diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 1b41238c6c..abb68f035f 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -23,6 +23,13 @@ uid = "test_index" # BUILD_TIMEOUT = 300 field_name = default_float_vec_field_name binary_field_name = default_binary_vec_field_name +default_string_field_name =ct.default_string_field_name +index_name1=cf.gen_unique_str("float") +index_name2=cf.gen_unique_str("varhar") +index_name3=cf.gen_unique_str("binary") +default_string_index_params ={} +default_binary_schema = cf.gen_default_binary_collection_schema() +default_binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}} # query = gen_search_vectors_params(field_name, default_entities, default_top_k, 1) default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} @@ -615,8 +622,6 @@ class TestIndexBase: index = connect.describe_index(collection, "") assert not index # FLAT is the last index_type, drop all indexes in server - @pytest.mark.tags(CaseLabel.L2) - # @pytest.mark.timeout(BUILD_TIMEOUT) def test_create_different_index_repeatedly_B(self, connect, collection): """ target: check if index can be created repeatedly, with the different create_index params @@ -1310,3 +1315,188 @@ class TestIndexAsync: res = future.result() # TODO: log.info(res) + + +class TestIndexString(TestcaseBase): + """ + ****************************************************************** + The following cases are used to test create index about string + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L1) + def test_create_index_with_string_field(self): + """ + target: test create index with string field is not primary + method: 1.create collection and insert data + 2.only create an index with string field is not primary + expected: create index successfully + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name) + data = cf.gen_default_list_data() + collection_w.insert(data=data) + index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params) + cf.assert_equal_index(index, collection_w.indexes[0]) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_index_with_string_before_load(self): + """ + target: test create index with string field before load + method: 1.create collection and insert data + 2.create an index with string field before load + expected: create index successfully + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name) + data = cf.gen_default_list_data(ct.default_nb) + collection_w.insert(data=data) + index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params) + cf.assert_equal_index(index, collection_w.collection.indexes[0]) + collection_w.load() + assert collection_w.num_entities==default_nb + + @pytest.mark.tags(CaseLabel.L1) + def test_load_after_create_index_with_string(self): + """ + target: test load after create index with string field + method: 1.create collection and insert data + 2.collection load after create index with string field + expected: create index successfully + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name) + data = cf.gen_default_list_data(ct.default_nb) + collection_w.insert(data=data) + collection_w.load() + index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params) + cf.assert_equal_index(index, collection_w.collection.indexes[0]) + assert collection_w.num_entities==default_nb + + @pytest.mark.tags(CaseLabel.L1) + def test_create_index_with_string_field_is_primary(self): + """ + target: test create index with string field is primary + method: 1.create collection + 2.insert data + 3.only create an index with string field is primary + expected: create index successfully + """ + c_name = cf.gen_unique_str(prefix) + schema = cf.gen_string_pk_default_collection_schema() + collection_w = self.init_collection_wrap(name=c_name, schema=schema) + data = cf.gen_default_list_data() + collection_w.insert(data=data) + index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params) + cf.assert_equal_index(index, collection_w.collection.indexes[0]) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_index_or_not_with_string_field(self): + """ + target: test create index, half of the string fields are indexed and half are not + method: 1.create collection + 2.insert data + 3.half of the indexes are created and half are not in the string fields + expected: create index successfully + """ + c_name = cf.gen_unique_str(prefix) + string_fields = [cf.gen_string_field(name="test_string")] + schema = cf.gen_schema_multi_string_fields(string_fields) + collection_w = self.init_collection_wrap(name=c_name, schema=schema) + df = cf.gen_dataframe_multi_string_fields(string_fields=string_fields) + collection_w.insert(df) + self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_index_with_same_index_name(self): + """ + target: test create index with different fields use same index name + method: 1.create collection + 2.insert data + 3.only create index with different fields use same index name + expected: create index successfully + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name) + data = cf.gen_default_list_data() + collection_w.insert(data=data) + collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2) + collection_w.create_index(default_float_vec_field_name, default_index_params, + index_name=index_name2, + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1, ct.err_msg: "CreateIndex failed"}) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_different_index_fields(self): + """ + target: test create index with different fields + method: 1.create collection + 2.insert data + 3.create different indexes with string and float vector field + expected: create index successfully + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name) + data = cf.gen_default_list_data() + collection_w.insert(data=data) + collection_w.create_index(default_float_vec_field_name, default_index_params, index_name=index_name1) + assert collection_w.has_index(index_name=index_name1)[0]==True + collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2) + assert collection_w.has_index(index_name=index_name2)[0]==True + assert len(collection_w.collection.indexes)==2 + + @pytest.mark.tags(CaseLabel.L1) + def test_create_different_index_binary_fields(self): + """ + target: testing the creation of indexes with string and binary fields + method: 1.create collection + 2.insert data + 3.create different indexes with string and binary vector field + expected: create index successfully + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema) + df, _ = cf.gen_default_binary_dataframe_data() + collection_w.insert(data=df) + collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2) + assert collection_w.has_index(index_name=index_name2)[0]==True + collection_w.create_index(default_binary_vec_field_name, default_binary_index_params, index_name=index_name3) + assert collection_w.has_index(index_name=index_name3)[0]==True + assert len(collection_w.collection.indexes)==2 + + @pytest.mark.tags(CaseLabel.L1) + def test_drop_index_with_string_field(self): + """ + target: test drop index with string field + method: 1.create collection and insert data + 2.create index and use index.drop() drop index + expected: drop index successfully + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name) + data = cf.gen_default_list_data() + collection_w.insert(data=data) + index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name, default_string_index_params) + cf.assert_equal_index(index, collection_w.collection.indexes[0]) + self.index_wrap.drop() + assert len(collection_w.collection.indexes) == 0 + + @pytest.mark.tags(CaseLabel.L1) + def test_collection_drop_index_with_string(self): + """ + target: test drop index with string field + method: 1.create collection and insert data + 2.create index and uses collection.drop_index () drop index + expected: drop index successfully + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name) + data = cf.gen_default_list_data() + collection_w.insert(data=data) + collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2) + collection_w.drop_index(index_name=index_name2) + assert len(collection_w.collection.indexes) == 0 + + + + + diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index 2e4503fa29..2edfc18413 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -16,6 +16,9 @@ import utils.util_pymilvus as ut prefix = "query" exp_res = "exp_res" default_term_expr = f'{ct.default_int64_field_name} in [0, 1]' +default_mix_expr = "int64 >= 0 && varchar >= \"0\"" +default_invaild_expr = "varchar >= 0" +default_string_term_expr = f'{ct.default_string_field_name} in [\"0\", \"1\"]' default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}} @@ -23,6 +26,7 @@ default_entities = ut.gen_entities(ut.default_nb, is_normal=True) default_pos = 5 default_int_field_name = "int64" default_float_field_name = "float" +default_string_field_name = "varchar" class TestQueryParams(TestcaseBase): @@ -237,8 +241,9 @@ class TestQueryParams(TestcaseBase): ct.default_int64_field_name: pd.Series(data=[i for i in range(ct.default_nb)]), ct.default_int32_field_name: pd.Series(data=[np.int32(i) for i in range(ct.default_nb)], dtype="int32"), ct.default_int16_field_name: pd.Series(data=[np.int16(i) for i in range(ct.default_nb)], dtype="int16"), - ct.default_float_field_name: pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"), + ct.default_float_field_name: pd.Series(data=[np.float32(i) for i in range(ct.default_nb)], dtype="float32"), ct.default_double_field_name: pd.Series(data=[np.double(i) for i in range(ct.default_nb)], dtype="double"), + ct.default_string_field_name: pd.Series(data=[str(i) for i in range(ct.default_nb)], dtype="string"), ct.default_float_vec_field_name: cf.gen_vectors(ct.default_nb, ct.default_dim) }) self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, @@ -248,7 +253,7 @@ class TestQueryParams(TestcaseBase): # query by non_primary non_vector scalar field non_primary_field = [ct.default_int32_field_name, ct.default_int16_field_name, - ct.default_float_field_name, ct.default_double_field_name] + ct.default_float_field_name, ct.default_double_field_name, ct.default_string_field_name] # exp res: first two rows and all fields expect last vec field res = df.iloc[:2, :-1].to_dict('records') @@ -1190,3 +1195,84 @@ class TestQueryOperation(TestcaseBase): collection_w.query(f'{ct.default_int64_field_name} in [1]', check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + +class TestqueryString(TestcaseBase): + """ + ****************************************************************** + The following cases are used to test query with string + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + def test_query_string_is_not_primary(self): + """ + target: test query data with string field is not primary + method: create collection and insert data + collection.load() + query with string expr in string field is not primary + expected: query successfully + """ + + collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2] + res = vectors[0].iloc[:2, :3].to_dict('records') + output_fields = [default_float_field_name, default_string_field_name] + collection_w.query(default_string_term_expr, output_fields=output_fields, + check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(default_string_field_name)) + def test_query_string_is_primary(self, expression): + """ + target: test query with output field only primary field + method: specify string primary field as output field + expected: return string primary field + """ + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, primary_field=ct.default_string_field_name)[0:2] + res, _ = collection_w.query(expression, output_fields=[ct.default_string_field_name]) + assert list(res[0].keys()) == [ct.default_string_field_name] + + + @pytest.mark.tags(CaseLabel.L1) + def test_query_string_with_mix_expr(self): + """ + target: test query data + method: create collection and insert data + query with mix expr in string field and int field + expected: query successfully + """ + collection_w, vectors = self.init_collection_general(prefix, insert_data=True, primary_field=ct.default_string_field_name)[0:2] + res = vectors[0].iloc[:, 1:3].to_dict('records') + output_fields = [default_float_field_name, default_string_field_name] + collection_w.query(default_mix_expr, output_fields=output_fields, + check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expression", cf.gen_invaild_string_expressions()) + def test_query_with_invalid_string_expr(self, expression): + """ + target: test query data + method: create collection and insert data + query with invalid expr + expected: Raise exception + """ + collection_w = self.init_collection_general(prefix, insert_data=True)[0] + collection_w.query(expression, check_task=CheckTasks.err_res, + check_items={ct.err_code: 1, ct.err_msg: "type mismatch"}) + + + @pytest.mark.tags(CaseLabel.L1) + def test_query_string_expr_with_binary(self): + """ + target: test query string expr with binary + method: query string expr with binary + expected: verify query successfully + """ + collection_w, vectors= self.init_collection_general(prefix, insert_data=True, is_binary=True, is_index=True)[0:2] + collection_w.create_index(ct.default_binary_vec_field_name, binary_index_params) + collection_w.load() + assert collection_w.has_index()[0] + res, _ = collection_w.query(default_string_term_expr, output_fields=[ct.default_binary_vec_field_name]) + assert len(res) == 2 + +