From 6da1c66357d9dfe74b70b59ecb027ada76eb72e0 Mon Sep 17 00:00:00 2001
From: ThreadDao <yufen.zong@zilliz.com>
Date: Mon, 24 May 2021 17:44:56 +0800
Subject: [PATCH] Add test cases for collection primary and data (#5371)

1. update case for collection desc
2. test collection with primary
3. test collection with data

See also: #5345 #5349 #5350 #5367

Signed-off-by:ThreadDao yufen.zong@zilliz.com
---
 tests20/python_client/common/common_func.py   |  32 +-
 tests20/python_client/common/common_type.py   |   1 +
 .../testcases/test_collection.py              | 381 +++++++++++++++++-
 3 files changed, 401 insertions(+), 13 deletions(-)

diff --git a/tests20/python_client/common/common_func.py b/tests20/python_client/common/common_func.py
index 03b31ec123..0eae686a3f 100644
--- a/tests20/python_client/common/common_func.py
+++ b/tests20/python_client/common/common_func.py
@@ -18,38 +18,38 @@ def gen_unique_str(str_value=None):
     return "test_" + prefix if str_value is None else str_value + "_" + prefix
 
 
-def gen_int64_field(name=ct.default_int64_field, is_primary=False, description=ct.int_field_desc):
+def gen_int64_field(name=ct.default_int64_field, is_primary=False, description=ct.default_desc):
     int64_field = FieldSchema(name=name, dtype=DataType.INT64, description=description, is_primary=is_primary)
     return int64_field
 
 
-def gen_float_field(name=ct.default_float_field, is_primary=False, description=ct.float_field_desc):
+def gen_float_field(name=ct.default_float_field, is_primary=False, description=ct.default_desc):
     float_field = FieldSchema(name=name, dtype=DataType.FLOAT, description=description, is_primary=is_primary)
     return float_field
 
 
 def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
-                        description=ct.float_vec_field_desc):
+                        description=ct.default_desc):
     float_vec_field = FieldSchema(name=name, dtype=DataType.FLOAT_VECTOR, description=description, dim=dim,
                                   is_primary=is_primary)
     return float_vec_field
 
 
 def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False, dim=ct.default_dim,
-                         description=ct.binary_vec_field_desc):
+                         description=ct.default_desc):
     binary_vec_field = FieldSchema(name=name, dtype=DataType.BINARY_VECTOR, description=description, dim=dim,
                                    is_primary=is_primary)
     return binary_vec_field
 
 
-def gen_default_collection_schema(description=ct.default_collection_desc, primary_field=None):
+def gen_default_collection_schema(description=ct.default_desc, primary_field=None):
     fields = [gen_int64_field(), gen_float_field(), gen_float_vec_field()]
     schema = CollectionSchema(fields=fields, description=description, primary_field=primary_field)
     return schema
 
 
-def gen_collection_schema(fields, description=ct.collection_desc, **kwargs):
-    schema = CollectionSchema(fields=fields, description=description, **kwargs)
+def gen_collection_schema(fields, primary_field=None, description=ct.default_desc):
+    schema = CollectionSchema(fields=fields, primary_field=primary_field, description=description)
     return schema
 
 
@@ -103,6 +103,16 @@ def get_vectors(num, dim, is_normal=True):
     return vectors.tolist()
 
 
+def gen_binary_vectors(num, dim):
+    raw_vectors = []
+    binary_vectors = []
+    for i in range(num):
+        raw_vector = [random.randint(0, 1) for i in range(dim)]
+        raw_vectors.append(raw_vector)
+        binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
+    return raw_vectors, binary_vectors
+
+
 def gen_invalid_field_types():
     field_types = [
         6,
@@ -116,6 +126,14 @@ def gen_invalid_field_types():
     return field_types
 
 
+def gen_all_type_fields():
+    fields = []
+    for k, v in DataType.__members__.items():
+        field = FieldSchema(name=k.lower(), dtype=v)
+        fields.append(field)
+    return fields
+
+
 def modify_file(file_name_list, input_content=""):
     if not isinstance(file_name_list, list):
         log.error("[modify_file] file is not a list.")
diff --git a/tests20/python_client/common/common_type.py b/tests20/python_client/common/common_type.py
index 17a17135cd..28a9d73b42 100644
--- a/tests20/python_client/common/common_type.py
+++ b/tests20/python_client/common/common_type.py
@@ -21,6 +21,7 @@ default_binary_vec_field_name = "binary_vector"
 default_partition_name = "_default"
 default_tag = "1970_01_01"
 row_count = "row_count"
+default_desc = ""
 default_collection_desc = "default collection"
 default_binary_desc = "default binary collection"
 collection_desc = "collection"
diff --git a/tests20/python_client/testcases/test_collection.py b/tests20/python_client/testcases/test_collection.py
index 1d66dd25cc..73a045a575 100644
--- a/tests20/python_client/testcases/test_collection.py
+++ b/tests20/python_client/testcases/test_collection.py
@@ -1,4 +1,5 @@
 import pytest
+from milvus import DataType
 from pymilvus_orm import FieldSchema
 
 from base.client_request import ApiReq
@@ -68,6 +69,24 @@ class TestCollectionParams(ApiReq):
     def get_invalid_field_type(self, request):
         yield request.param
 
+    @pytest.fixture(
+        scope="function",
+        params=cf.gen_all_type_fields()
+    )
+    def get_unsupported_primary_field(self, request):
+        if request.param.dtype == DataType.INT64:
+            pytest.skip("int64 type is valid primary key")
+        yield request.param
+
+    @pytest.fixture(
+        scope="function",
+        params=ct.get_invalid_strs
+    )
+    def get_invalid_dim(self, request):
+        if request.param == 1:
+            request.param = 0
+        yield request.param
+
     @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.xfail(reason="issue #5224")
     def test_collection(self):
@@ -107,7 +126,7 @@ class TestCollectionParams(ApiReq):
         assert "invalid" or "illegal" in str(ex)
 
     @pytest.mark.tags(CaseLabel.L0)
-    @pytest.mark.xfail(reason="issue #5231 #5241")
+    @pytest.mark.xfail(reason="issue #5241 #5367")
     def test_collection_dup_name(self):
         """
         target: test collection with dup name
@@ -119,11 +138,28 @@ class TestCollectionParams(ApiReq):
         c_name = collection.name
         assert_default_collection(collection)
         dup_collection, _ = self.collection.collection_init(c_name)
-        assert_default_collection(dup_collection, c_name)
         assert collection.name == dup_collection.name
-        assert collection.name in self.utility.list_collections()
         assert collection.schema == dup_collection.schema
+        assert collection.num_entities == dup_collection.num_entities
         assert id(collection) == id(dup_collection)
+        assert collection.name in self.utility.list_collections()
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue #5231")
+    def test_collection_dup_name_with_desc(self):
+        """
+        target: test collection with dup name
+        method: 1. default schema with desc 2. dup name collection
+        expected: desc consistent
+        """
+        self._connect()
+        schema = cf.gen_default_collection_schema(description=ct.collection_desc)
+        collection = self._collection(schema=schema)
+        assert_default_collection(collection, exp_schema=schema)
+        c_name = collection.name
+        dup_collection, _ = self.collection.collection_init(c_name)
+        assert_default_collection(dup_collection, c_name, exp_schema=schema)
+        assert collection.description == dup_collection.description
 
     @pytest.mark.tags(CaseLabel.L1)
     def test_collection_dup_name_new_schema(self):
@@ -195,7 +231,7 @@ class TestCollectionParams(ApiReq):
         assert_default_collection(collection, c_name)
 
     @pytest.mark.tags(CaseLabel.L1)
-    @pytest.mark.xfail(reason="issue #5231, #5241")
+    @pytest.mark.xfail(reason="issue #5241 #5367")
     def test_collection_dup_name_same_schema(self):
         """
         target: test collection with dup name and same schema
@@ -203,7 +239,7 @@ class TestCollectionParams(ApiReq):
         expected: two collection object is available
         """
         self._connect()
-        collection = self._collection()
+        collection = self._collection(schema=default_schema)
         c_name = collection.name
         assert_default_collection(collection)
         dup_collection, _ = self.collection.collection_init(c_name, schema=default_schema)
@@ -348,6 +384,49 @@ class TestCollectionParams(ApiReq):
         has, _ = self.utility.has_collection(c_name)
         assert not has
 
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_collection_only_float_vector(self):
+        """
+        target: test collection just with float-vec field
+        method: create with float-vec fields
+        expected: no exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        schema = cf.gen_collection_schema(fields=[cf.gen_float_vec_field()])
+        collection, _ = self.collection.collection_init(c_name, schema=schema)
+        assert_default_collection(collection, c_name, exp_schema=schema)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue #5345")
+    def test_collection_multi_float_vectors(self):
+        """
+        target: test collection with multi float vectors
+        method: create collection with two float-vec fields
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        fields = [cf.gen_float_vec_field(), cf.gen_float_vec_field(name="tmp")]
+        schema = cf.gen_collection_schema(fields=fields)
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        log.debug(ex)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue #5345")
+    def test_collection_mix_vectors(self):
+        """
+        target: test collection with mix vectors
+        method: create with float and binary vec
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        fields = [cf.gen_float_vec_field(), cf.gen_binary_vec_field()]
+        schema = cf.gen_collection_schema(fields=fields)
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        log.debug(ex)
+
     @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.xfail(reason="issue #5285")
     def test_collection_without_vectors(self):
@@ -362,6 +441,180 @@ class TestCollectionParams(ApiReq):
         ex, _ = self.collection.collection_init(c_name, schema=schema)
         assert "must" in str(ex)
 
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_collection_primary_field(self):
+        """
+        target: test collection with primary field
+        method: specify primary field
+        expected: collection.primary_field
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        schema = cf.gen_default_collection_schema(primary_field=ct.default_int64_field)
+        collection, _ = self.collection.collection_init(c_name, schema=schema)
+        assert collection.primary_field.name == ct.default_int64_field
+
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_collection_unsupported_primary_field(self, get_unsupported_primary_field):
+        """
+        target: test collection with unsupported parimary field type
+        method: specify non-int64 as primary field
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        field = get_unsupported_primary_field
+        schema = cf.gen_collection_schema(fields=[field], primary_field=field.name)
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        assert "the data type of primary key should be int64" in str(ex)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_collection_multi_primary_fields(self):
+        """
+        target: test collection with multi primary
+        method: collection with two primary fields
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        int_field = cf.gen_int64_field(is_primary=True)
+        float_vec_field = cf.gen_float_vec_field(is_primary=True)
+        schema = cf.gen_collection_schema(fields=[int_field, float_vec_field])
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        assert "there are more than one primary key" in str(ex)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue #5349")
+    def test_collection_primary_inconsistent(self):
+        """
+        target: test collection with different primary field setting
+        method: 1. set A field is_primary 2. set primary_field is B
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        int_field = cf.gen_int64_field(name="int", is_primary=True)
+        float_vec_field = cf.gen_float_vec_field(name="vec")
+        schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], primary_field="vec")
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        log.info(schema)
+        log.info(ex.primary_field.name)
+        assert "there are more than one primary key" in str(ex)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_collection_field_primary_false(self):
+        """
+        target: test collection with primary false
+        method: define field with is_primary false
+        expected: no exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        int_field = cf.gen_int64_field(name="int")
+        float_vec_field = cf.gen_float_vec_field()
+        schema = cf.gen_collection_schema(fields=[int_field, float_vec_field])
+        collection, _ = self.collection.collection_init(c_name, schema=schema)
+        assert collection.primary_field is None
+        assert collection.schema.auto_id
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue #5350")
+    def test_collection_field_invalid_primary(self, get_invalid_string):
+        """
+        target: test collection with invalid primary
+        method: define field with is_primary=non-bool
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        int_field = cf.gen_int64_field(name="int", is_primary=get_invalid_string)
+        float_vec_field = cf.gen_float_vec_field()
+        schema = cf.gen_collection_schema(fields=[int_field, float_vec_field])
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        log.info(str(ex))
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_collection_vector_without_dim(self):
+        """
+        target: test collection without dimension
+        method: define vector field without dim
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        float_vec_field = FieldSchema(name=ct.default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR)
+        schema = cf.gen_collection_schema(fields=[float_vec_field])
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        assert "dimension is not defined in field type params" in str(ex)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_collection_vector_invalid_dim(self, get_invalid_dim):
+        """
+        target: test collection with invalid dimension
+        method: define float-vec field with invalid dimension
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        float_vec_field = cf.gen_float_vec_field(dim=get_invalid_dim)
+        schema = cf.gen_collection_schema(fields=[float_vec_field])
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        assert "dim must be of int" in str(ex)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("dim", [-1, 32769])
+    def test_collection_vector_out_bounds_dim(self, dim):
+        """
+        target: test collection with out of bounds dim
+        method: invalid dim -1 and 32759
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        float_vec_field = cf.gen_float_vec_field(dim=dim)
+        schema = cf.gen_collection_schema(fields=[float_vec_field])
+        ex, _ = self.collection.collection_init(c_name, schema=schema)
+        assert "invalid dimension: {}. should be in range 1 ~ 32768".format(dim) in str(ex)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_collection_non_vector_field_dim(self):
+        """
+        target: test collection with dim for non-vector field
+        method: define int64 field with dim
+        expected: no exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        int_field = FieldSchema(name="int", dtype=DataType.INT64, dim=ct.default_dim)
+        float_vec_field = cf.gen_float_vec_field()
+        schema = cf.gen_collection_schema(fields=[int_field, float_vec_field])
+        collection, _ = self.collection.collection_init(c_name, schema=schema)
+        assert_default_collection(collection, c_name, exp_schema=schema)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("desc", [None, ct.collection_desc])
+    def test_collection_desc(self, desc):
+        """
+        target: test collection with none description
+        method: create with none description
+        expected: assert default description
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        schema = cf.gen_default_collection_schema(description=desc)
+        collection, _ = self.collection.collection_init(c_name, schema=schema)
+        assert_default_collection(collection, c_name, exp_schema=schema)
+
+    # TODO
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_collection_long_desc(self):
+        """
+        target: test collection with long desc
+        method: create with long desc
+        expected:
+        """
+        pass
+
     @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.xfail(reason="issue #5302")
     def test_collection_with_dataframe(self):
@@ -400,7 +653,22 @@ class TestCollectionOperation(ApiReq):
     ******************************************************************
     """
 
-    # #5237
+    def teardown_method(self):
+        if self.collection is not None and self.collection.collection is not None:
+            self.collection.drop()
+
+    def setup_method(self):
+        pass
+
+    @pytest.fixture(
+        scope="function",
+        params=ct.get_invalid_strs
+    )
+    def get_non_df(self, request):
+        if request.param is None:
+            pytest.skip("skip None")
+        yield request.param
+
     @pytest.mark.tags(CaseLabel.L1)
     def test_collection_without_connection(self):
         """
@@ -434,6 +702,7 @@ class TestCollectionOperation(ApiReq):
             assert c_name not in self.utility.list_collections()
 
     @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue #xxx")
     def test_collection_dup_name_drop(self):
         """
         target: test collection with dup name, and drop
@@ -444,6 +713,7 @@ class TestCollectionOperation(ApiReq):
         self._connect()
         collection = self._collection()
         assert_default_collection(collection)
+        log.info(collection.schema)
         dup_collection, _ = self.collection.collection_init(collection.name)
         assert_default_collection(dup_collection, collection.name)
         dup_collection.drop()
@@ -451,3 +721,102 @@ class TestCollectionOperation(ApiReq):
         assert not has
         with pytest.raises(Exception, match="can't find collection"):
             collection.num_entities
+
+    @pytest.mark.tags(CaseLabel.L0)
+    @pytest.mark.xfail(reason="issue #5302")
+    def test_collection_schema_insert_dataframe(self):
+        """
+        target: test collection create and insert dataframe
+        method: 1. create by schema 2. insert dataframe
+        expected: assert num_entities
+        """
+        self._connect()
+        nb = ct.default_nb
+        collection = self._collection()
+        assert_default_collection(collection)
+        df = cf.gen_default_dataframe_data(nb)
+        self.collection.insert(data=df)
+        assert collection.num_entities == nb
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.xfail(reason="issue #5302")
+    def test_collection_created_by_dataframe(self):
+        """
+        target: test collection with dataframe
+        method: create collection with dataframe
+        expected: create successfully
+        """
+        self._connect()
+        nb = ct.default_nb
+        c_name = cf.gen_unique_str(prefix)
+        df = cf.gen_default_dataframe_data(nb)
+        schema = cf.gen_default_collection_schema()
+        collection, _ = self.collection.collection_init(name=c_name, data=df)
+        assert_default_collection(collection, exp_name=c_name, exp_num=nb, exp_schema=schema)
+
+    # TODO
+    @pytest.mark.tags(CaseLabel.L0)
+    def _test_collection_created_by_invalid_dataframe(self, get_invalid_df):
+        """
+        target: test create collection by invalid dataframe
+        method: invalid dataframe type create collection
+        expected: raise exception
+        """
+        pass
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_collection_created_by_non_dataframe(self, get_non_df):
+        """
+        target: test create collection by invalid dataframe
+        method: non-dataframe type create collection
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        ex, _ = self.collection.collection_init(name=c_name, schema=None, data=get_non_df)
+        assert "Data of not pandas.DataFrame type should bepassed into the schema" in str(ex)
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_collection_created_by_data_list(self):
+        """
+        target: test create collection by data list
+        method: data type is list-like
+        expected: raise exception
+        """
+        self._connect()
+        c_name = cf.gen_unique_str(prefix)
+        data = cf.gen_default_list_data(nb=100)
+        ex, _ = self.collection.collection_init(name=c_name, schema=None, data=data)
+        assert "Data of not pandas.DataFrame type should bepassed into the schema" in str(ex)
+
+    @pytest.mark.tags(CaseLabel.L0)
+    @pytest.mark.xfail(reason="issue #5302")
+    def test_collection_schema_insert_data(self):
+        """
+        target: test collection create and insert list-like data
+        method: 1. create by schema 2. insert data
+        expected: assert num_entities
+        """
+        self._connect()
+        nb = ct.default_nb
+        collection = self._collection()
+        assert_default_collection(collection)
+        data = cf.gen_default_list_data(nb)
+        self.collection.insert(data=data)
+        assert collection.num_entities == nb
+
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_collection_after_drop(self):
+        """
+        target: test create collection after create and drop
+        method: 1. create a 2. drop a 3, re-create a
+        expected: no exception
+        """
+        collection = self._collection()
+        assert_default_collection(collection)
+        c_name = collection.name
+        collection.drop()
+        assert not self.utility.has_collection(c_name)[0]
+        re_collection = self._collection(name=c_name)
+        assert_default_collection(re_collection, c_name)
+        assert self.utility.has_collection(c_name)[0]
\ No newline at end of file