From 6da1c66357d9dfe74b70b59ecb027ada76eb72e0 Mon Sep 17 00:00:00 2001 From: ThreadDao Date: Mon, 24 May 2021 17:44:56 +0800 Subject: [PATCH] Add test cases for collection primary and data (#5371) 1. update case for collection desc 2. test collection with primary 3. test collection with data See also: #5345 #5349 #5350 #5367 Signed-off-by:ThreadDao yufen.zong@zilliz.com --- tests20/python_client/common/common_func.py | 32 +- tests20/python_client/common/common_type.py | 1 + .../testcases/test_collection.py | 381 +++++++++++++++++- 3 files changed, 401 insertions(+), 13 deletions(-) diff --git a/tests20/python_client/common/common_func.py b/tests20/python_client/common/common_func.py index 03b31ec123..0eae686a3f 100644 --- a/tests20/python_client/common/common_func.py +++ b/tests20/python_client/common/common_func.py @@ -18,38 +18,38 @@ def gen_unique_str(str_value=None): return "test_" + prefix if str_value is None else str_value + "_" + prefix -def gen_int64_field(name=ct.default_int64_field, is_primary=False, description=ct.int_field_desc): +def gen_int64_field(name=ct.default_int64_field, is_primary=False, description=ct.default_desc): int64_field = FieldSchema(name=name, dtype=DataType.INT64, description=description, is_primary=is_primary) return int64_field -def gen_float_field(name=ct.default_float_field, is_primary=False, description=ct.float_field_desc): +def gen_float_field(name=ct.default_float_field, is_primary=False, description=ct.default_desc): float_field = FieldSchema(name=name, dtype=DataType.FLOAT, description=description, is_primary=is_primary) return float_field def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim, - description=ct.float_vec_field_desc): + description=ct.default_desc): float_vec_field = FieldSchema(name=name, dtype=DataType.FLOAT_VECTOR, description=description, dim=dim, is_primary=is_primary) return float_vec_field def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False, dim=ct.default_dim, - description=ct.binary_vec_field_desc): + description=ct.default_desc): binary_vec_field = FieldSchema(name=name, dtype=DataType.BINARY_VECTOR, description=description, dim=dim, is_primary=is_primary) return binary_vec_field -def gen_default_collection_schema(description=ct.default_collection_desc, primary_field=None): +def gen_default_collection_schema(description=ct.default_desc, primary_field=None): fields = [gen_int64_field(), gen_float_field(), gen_float_vec_field()] schema = CollectionSchema(fields=fields, description=description, primary_field=primary_field) return schema -def gen_collection_schema(fields, description=ct.collection_desc, **kwargs): - schema = CollectionSchema(fields=fields, description=description, **kwargs) +def gen_collection_schema(fields, primary_field=None, description=ct.default_desc): + schema = CollectionSchema(fields=fields, primary_field=primary_field, description=description) return schema @@ -103,6 +103,16 @@ def get_vectors(num, dim, is_normal=True): return vectors.tolist() +def gen_binary_vectors(num, dim): + raw_vectors = [] + binary_vectors = [] + for i in range(num): + raw_vector = [random.randint(0, 1) for i in range(dim)] + raw_vectors.append(raw_vector) + binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist())) + return raw_vectors, binary_vectors + + def gen_invalid_field_types(): field_types = [ 6, @@ -116,6 +126,14 @@ def gen_invalid_field_types(): return field_types +def gen_all_type_fields(): + fields = [] + for k, v in DataType.__members__.items(): + field = FieldSchema(name=k.lower(), dtype=v) + fields.append(field) + return fields + + def modify_file(file_name_list, input_content=""): if not isinstance(file_name_list, list): log.error("[modify_file] file is not a list.") diff --git a/tests20/python_client/common/common_type.py b/tests20/python_client/common/common_type.py index 17a17135cd..28a9d73b42 100644 --- a/tests20/python_client/common/common_type.py +++ b/tests20/python_client/common/common_type.py @@ -21,6 +21,7 @@ default_binary_vec_field_name = "binary_vector" default_partition_name = "_default" default_tag = "1970_01_01" row_count = "row_count" +default_desc = "" default_collection_desc = "default collection" default_binary_desc = "default binary collection" collection_desc = "collection" diff --git a/tests20/python_client/testcases/test_collection.py b/tests20/python_client/testcases/test_collection.py index 1d66dd25cc..73a045a575 100644 --- a/tests20/python_client/testcases/test_collection.py +++ b/tests20/python_client/testcases/test_collection.py @@ -1,4 +1,5 @@ import pytest +from milvus import DataType from pymilvus_orm import FieldSchema from base.client_request import ApiReq @@ -68,6 +69,24 @@ class TestCollectionParams(ApiReq): def get_invalid_field_type(self, request): yield request.param + @pytest.fixture( + scope="function", + params=cf.gen_all_type_fields() + ) + def get_unsupported_primary_field(self, request): + if request.param.dtype == DataType.INT64: + pytest.skip("int64 type is valid primary key") + yield request.param + + @pytest.fixture( + scope="function", + params=ct.get_invalid_strs + ) + def get_invalid_dim(self, request): + if request.param == 1: + request.param = 0 + yield request.param + @pytest.mark.tags(CaseLabel.L0) @pytest.mark.xfail(reason="issue #5224") def test_collection(self): @@ -107,7 +126,7 @@ class TestCollectionParams(ApiReq): assert "invalid" or "illegal" in str(ex) @pytest.mark.tags(CaseLabel.L0) - @pytest.mark.xfail(reason="issue #5231 #5241") + @pytest.mark.xfail(reason="issue #5241 #5367") def test_collection_dup_name(self): """ target: test collection with dup name @@ -119,11 +138,28 @@ class TestCollectionParams(ApiReq): c_name = collection.name assert_default_collection(collection) dup_collection, _ = self.collection.collection_init(c_name) - assert_default_collection(dup_collection, c_name) assert collection.name == dup_collection.name - assert collection.name in self.utility.list_collections() assert collection.schema == dup_collection.schema + assert collection.num_entities == dup_collection.num_entities assert id(collection) == id(dup_collection) + assert collection.name in self.utility.list_collections() + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5231") + def test_collection_dup_name_with_desc(self): + """ + target: test collection with dup name + method: 1. default schema with desc 2. dup name collection + expected: desc consistent + """ + self._connect() + schema = cf.gen_default_collection_schema(description=ct.collection_desc) + collection = self._collection(schema=schema) + assert_default_collection(collection, exp_schema=schema) + c_name = collection.name + dup_collection, _ = self.collection.collection_init(c_name) + assert_default_collection(dup_collection, c_name, exp_schema=schema) + assert collection.description == dup_collection.description @pytest.mark.tags(CaseLabel.L1) def test_collection_dup_name_new_schema(self): @@ -195,7 +231,7 @@ class TestCollectionParams(ApiReq): assert_default_collection(collection, c_name) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.xfail(reason="issue #5231, #5241") + @pytest.mark.xfail(reason="issue #5241 #5367") def test_collection_dup_name_same_schema(self): """ target: test collection with dup name and same schema @@ -203,7 +239,7 @@ class TestCollectionParams(ApiReq): expected: two collection object is available """ self._connect() - collection = self._collection() + collection = self._collection(schema=default_schema) c_name = collection.name assert_default_collection(collection) dup_collection, _ = self.collection.collection_init(c_name, schema=default_schema) @@ -348,6 +384,49 @@ class TestCollectionParams(ApiReq): has, _ = self.utility.has_collection(c_name) assert not has + @pytest.mark.tags(CaseLabel.L0) + def test_collection_only_float_vector(self): + """ + target: test collection just with float-vec field + method: create with float-vec fields + expected: no exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + schema = cf.gen_collection_schema(fields=[cf.gen_float_vec_field()]) + collection, _ = self.collection.collection_init(c_name, schema=schema) + assert_default_collection(collection, c_name, exp_schema=schema) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5345") + def test_collection_multi_float_vectors(self): + """ + target: test collection with multi float vectors + method: create collection with two float-vec fields + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + fields = [cf.gen_float_vec_field(), cf.gen_float_vec_field(name="tmp")] + schema = cf.gen_collection_schema(fields=fields) + ex, _ = self.collection.collection_init(c_name, schema=schema) + log.debug(ex) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5345") + def test_collection_mix_vectors(self): + """ + target: test collection with mix vectors + method: create with float and binary vec + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + fields = [cf.gen_float_vec_field(), cf.gen_binary_vec_field()] + schema = cf.gen_collection_schema(fields=fields) + ex, _ = self.collection.collection_init(c_name, schema=schema) + log.debug(ex) + @pytest.mark.tags(CaseLabel.L0) @pytest.mark.xfail(reason="issue #5285") def test_collection_without_vectors(self): @@ -362,6 +441,180 @@ class TestCollectionParams(ApiReq): ex, _ = self.collection.collection_init(c_name, schema=schema) assert "must" in str(ex) + @pytest.mark.tags(CaseLabel.L0) + def test_collection_primary_field(self): + """ + target: test collection with primary field + method: specify primary field + expected: collection.primary_field + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + schema = cf.gen_default_collection_schema(primary_field=ct.default_int64_field) + collection, _ = self.collection.collection_init(c_name, schema=schema) + assert collection.primary_field.name == ct.default_int64_field + + @pytest.mark.tags(CaseLabel.L1) + def test_collection_unsupported_primary_field(self, get_unsupported_primary_field): + """ + target: test collection with unsupported parimary field type + method: specify non-int64 as primary field + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + field = get_unsupported_primary_field + schema = cf.gen_collection_schema(fields=[field], primary_field=field.name) + ex, _ = self.collection.collection_init(c_name, schema=schema) + assert "the data type of primary key should be int64" in str(ex) + + @pytest.mark.tags(CaseLabel.L1) + def test_collection_multi_primary_fields(self): + """ + target: test collection with multi primary + method: collection with two primary fields + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + int_field = cf.gen_int64_field(is_primary=True) + float_vec_field = cf.gen_float_vec_field(is_primary=True) + schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) + ex, _ = self.collection.collection_init(c_name, schema=schema) + assert "there are more than one primary key" in str(ex) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5349") + def test_collection_primary_inconsistent(self): + """ + target: test collection with different primary field setting + method: 1. set A field is_primary 2. set primary_field is B + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + int_field = cf.gen_int64_field(name="int", is_primary=True) + float_vec_field = cf.gen_float_vec_field(name="vec") + schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], primary_field="vec") + ex, _ = self.collection.collection_init(c_name, schema=schema) + log.info(schema) + log.info(ex.primary_field.name) + assert "there are more than one primary key" in str(ex) + + @pytest.mark.tags(CaseLabel.L1) + def test_collection_field_primary_false(self): + """ + target: test collection with primary false + method: define field with is_primary false + expected: no exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + int_field = cf.gen_int64_field(name="int") + float_vec_field = cf.gen_float_vec_field() + schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) + collection, _ = self.collection.collection_init(c_name, schema=schema) + assert collection.primary_field is None + assert collection.schema.auto_id + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5350") + def test_collection_field_invalid_primary(self, get_invalid_string): + """ + target: test collection with invalid primary + method: define field with is_primary=non-bool + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + int_field = cf.gen_int64_field(name="int", is_primary=get_invalid_string) + float_vec_field = cf.gen_float_vec_field() + schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) + ex, _ = self.collection.collection_init(c_name, schema=schema) + log.info(str(ex)) + + @pytest.mark.tags(CaseLabel.L0) + def test_collection_vector_without_dim(self): + """ + target: test collection without dimension + method: define vector field without dim + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + float_vec_field = FieldSchema(name=ct.default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR) + schema = cf.gen_collection_schema(fields=[float_vec_field]) + ex, _ = self.collection.collection_init(c_name, schema=schema) + assert "dimension is not defined in field type params" in str(ex) + + @pytest.mark.tags(CaseLabel.L1) + def test_collection_vector_invalid_dim(self, get_invalid_dim): + """ + target: test collection with invalid dimension + method: define float-vec field with invalid dimension + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + float_vec_field = cf.gen_float_vec_field(dim=get_invalid_dim) + schema = cf.gen_collection_schema(fields=[float_vec_field]) + ex, _ = self.collection.collection_init(c_name, schema=schema) + assert "dim must be of int" in str(ex) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("dim", [-1, 32769]) + def test_collection_vector_out_bounds_dim(self, dim): + """ + target: test collection with out of bounds dim + method: invalid dim -1 and 32759 + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + float_vec_field = cf.gen_float_vec_field(dim=dim) + schema = cf.gen_collection_schema(fields=[float_vec_field]) + ex, _ = self.collection.collection_init(c_name, schema=schema) + assert "invalid dimension: {}. should be in range 1 ~ 32768".format(dim) in str(ex) + + @pytest.mark.tags(CaseLabel.L1) + def test_collection_non_vector_field_dim(self): + """ + target: test collection with dim for non-vector field + method: define int64 field with dim + expected: no exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + int_field = FieldSchema(name="int", dtype=DataType.INT64, dim=ct.default_dim) + float_vec_field = cf.gen_float_vec_field() + schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) + collection, _ = self.collection.collection_init(c_name, schema=schema) + assert_default_collection(collection, c_name, exp_schema=schema) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("desc", [None, ct.collection_desc]) + def test_collection_desc(self, desc): + """ + target: test collection with none description + method: create with none description + expected: assert default description + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + schema = cf.gen_default_collection_schema(description=desc) + collection, _ = self.collection.collection_init(c_name, schema=schema) + assert_default_collection(collection, c_name, exp_schema=schema) + + # TODO + @pytest.mark.tags(CaseLabel.L1) + def test_collection_long_desc(self): + """ + target: test collection with long desc + method: create with long desc + expected: + """ + pass + @pytest.mark.tags(CaseLabel.L0) @pytest.mark.xfail(reason="issue #5302") def test_collection_with_dataframe(self): @@ -400,7 +653,22 @@ class TestCollectionOperation(ApiReq): ****************************************************************** """ - # #5237 + def teardown_method(self): + if self.collection is not None and self.collection.collection is not None: + self.collection.drop() + + def setup_method(self): + pass + + @pytest.fixture( + scope="function", + params=ct.get_invalid_strs + ) + def get_non_df(self, request): + if request.param is None: + pytest.skip("skip None") + yield request.param + @pytest.mark.tags(CaseLabel.L1) def test_collection_without_connection(self): """ @@ -434,6 +702,7 @@ class TestCollectionOperation(ApiReq): assert c_name not in self.utility.list_collections() @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #xxx") def test_collection_dup_name_drop(self): """ target: test collection with dup name, and drop @@ -444,6 +713,7 @@ class TestCollectionOperation(ApiReq): self._connect() collection = self._collection() assert_default_collection(collection) + log.info(collection.schema) dup_collection, _ = self.collection.collection_init(collection.name) assert_default_collection(dup_collection, collection.name) dup_collection.drop() @@ -451,3 +721,102 @@ class TestCollectionOperation(ApiReq): assert not has with pytest.raises(Exception, match="can't find collection"): collection.num_entities + + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.xfail(reason="issue #5302") + def test_collection_schema_insert_dataframe(self): + """ + target: test collection create and insert dataframe + method: 1. create by schema 2. insert dataframe + expected: assert num_entities + """ + self._connect() + nb = ct.default_nb + collection = self._collection() + assert_default_collection(collection) + df = cf.gen_default_dataframe_data(nb) + self.collection.insert(data=df) + assert collection.num_entities == nb + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5302") + def test_collection_created_by_dataframe(self): + """ + target: test collection with dataframe + method: create collection with dataframe + expected: create successfully + """ + self._connect() + nb = ct.default_nb + c_name = cf.gen_unique_str(prefix) + df = cf.gen_default_dataframe_data(nb) + schema = cf.gen_default_collection_schema() + collection, _ = self.collection.collection_init(name=c_name, data=df) + assert_default_collection(collection, exp_name=c_name, exp_num=nb, exp_schema=schema) + + # TODO + @pytest.mark.tags(CaseLabel.L0) + def _test_collection_created_by_invalid_dataframe(self, get_invalid_df): + """ + target: test create collection by invalid dataframe + method: invalid dataframe type create collection + expected: raise exception + """ + pass + + @pytest.mark.tags(CaseLabel.L0) + def test_collection_created_by_non_dataframe(self, get_non_df): + """ + target: test create collection by invalid dataframe + method: non-dataframe type create collection + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + ex, _ = self.collection.collection_init(name=c_name, schema=None, data=get_non_df) + assert "Data of not pandas.DataFrame type should bepassed into the schema" in str(ex) + + @pytest.mark.tags(CaseLabel.L0) + def test_collection_created_by_data_list(self): + """ + target: test create collection by data list + method: data type is list-like + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + data = cf.gen_default_list_data(nb=100) + ex, _ = self.collection.collection_init(name=c_name, schema=None, data=data) + assert "Data of not pandas.DataFrame type should bepassed into the schema" in str(ex) + + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.xfail(reason="issue #5302") + def test_collection_schema_insert_data(self): + """ + target: test collection create and insert list-like data + method: 1. create by schema 2. insert data + expected: assert num_entities + """ + self._connect() + nb = ct.default_nb + collection = self._collection() + assert_default_collection(collection) + data = cf.gen_default_list_data(nb) + self.collection.insert(data=data) + assert collection.num_entities == nb + + @pytest.mark.tags(CaseLabel.L1) + def test_collection_after_drop(self): + """ + target: test create collection after create and drop + method: 1. create a 2. drop a 3, re-create a + expected: no exception + """ + collection = self._collection() + assert_default_collection(collection) + c_name = collection.name + collection.drop() + assert not self.utility.has_collection(c_name)[0] + re_collection = self._collection(name=c_name) + assert_default_collection(re_collection, c_name) + assert self.utility.has_collection(c_name)[0] \ No newline at end of file