diff --git a/tests20/python_client/common/common_func.py b/tests20/python_client/common/common_func.py index 0eae686a3f..5280b3e9f9 100644 --- a/tests20/python_client/common/common_func.py +++ b/tests20/python_client/common/common_func.py @@ -2,6 +2,7 @@ import os import random import string import numpy as np +import pandas as pd from sklearn import preprocessing from pymilvus_orm.types import DataType @@ -53,7 +54,7 @@ def gen_collection_schema(fields, primary_field=None, description=ct.default_des return schema -def gen_default_binary_collection_schema(description=ct.default_binary_desc, primary_field=None): +def gen_default_binary_collection_schema(description=ct.default_desc, primary_field=None): fields = [gen_int64_field(), gen_float_field(), gen_binary_vec_field()] binary_schema = CollectionSchema(fields=fields, description=description, primary_field=primary_field) return binary_schema @@ -65,8 +66,17 @@ def gen_vectors(nb, dim): return vectors.tolist() +def gen_binary_vectors(num, dim): + raw_vectors = [] + binary_vectors = [] + for _ in range(num): + raw_vector = [random.randint(0, 1) for _ in range(dim)] + raw_vectors.append(raw_vector) + binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist())) + return raw_vectors, binary_vectors + + def gen_default_dataframe_data(nb=ct.default_nb): - import pandas as pd int_values = pd.Series(data=[i for i in range(nb)]) float_values = pd.Series(data=[float(i) for i in range(nb)], dtype="float32") float_vec_values = gen_vectors(nb, ct.default_dim) @@ -78,14 +88,42 @@ def gen_default_dataframe_data(nb=ct.default_nb): return df +def gen_default_binary_dataframe_data(nb=ct.default_nb): + int_values = pd.Series(data=[i for i in range(nb)]) + float_values = pd.Series(data=[float(i) for i in range(nb)], dtype="float32") + _, binary_vec_values = gen_binary_vectors(nb, ct.default_dim) + df = pd.DataFrame({ + ct.default_int64_field: int_values, + ct.default_float_field: float_values, + ct.default_binary_vec_field_name: binary_vec_values + }) + return df + + def gen_default_list_data(nb=ct.default_nb): int_values = [i for i in range(nb)] - float_values = [float(i) for i in range(nb)] + float_values = [np.float32(i) for i in range(nb)] float_vec_values = gen_vectors(nb, ct.default_dim) data = [int_values, float_values, float_vec_values] return data +def gen_numpy_data(nb=ct.default_nb): + int_values = np.arange(nb, dtype='int64') + float_values = np.arange(nb, dtype='float32') + float_vec_values = gen_vectors(nb, ct.default_dim) + data = [int_values, float_values, float_vec_values] + return data + + +def gen_default_binary_list_data(nb=ct.default_nb): + int_values = [i for i in range(nb)] + float_values = [np.float32(i) for i in range(nb)] + _, binary_vec_values = gen_binary_vectors(nb, ct.default_dim) + data = [int_values, float_values, binary_vec_values] + return data + + def gen_simple_index(): index_params = [] for i in range(len(ct.all_index_types)): @@ -97,22 +135,6 @@ def gen_simple_index(): return index_params -def get_vectors(num, dim, is_normal=True): - vectors = [[random.random() for _ in range(dim)] for _ in range(num)] - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - return vectors.tolist() - - -def gen_binary_vectors(num, dim): - raw_vectors = [] - binary_vectors = [] - for i in range(num): - raw_vector = [random.randint(0, 1) for i in range(dim)] - raw_vectors.append(raw_vector) - binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist())) - return raw_vectors, binary_vectors - - def gen_invalid_field_types(): field_types = [ 6, @@ -134,6 +156,21 @@ def gen_all_type_fields(): return fields +def gen_invalid_dataframe(): + vec = gen_vectors(3, 2) + dfs = [ + # just columns df + pd.DataFrame(columns=[ct.default_int64_field, ct.default_float_vec_field_name]), + # no column just data df + pd.DataFrame({' ': vec}), + # datetime df + pd.DataFrame({"date": pd.date_range('20210101', periods=3)}), + # invalid column df + pd.DataFrame({'%$#': vec}), + ] + return dfs + + def modify_file(file_name_list, input_content=""): if not isinstance(file_name_list, list): log.error("[modify_file] file is not a list.") diff --git a/tests20/python_client/testcases/test_collection.py b/tests20/python_client/testcases/test_collection.py index 73a045a575..3f9d9c64bf 100644 --- a/tests20/python_client/testcases/test_collection.py +++ b/tests20/python_client/testcases/test_collection.py @@ -1,3 +1,4 @@ +import pandas as pd import pytest from milvus import DataType from pymilvus_orm import FieldSchema @@ -10,6 +11,7 @@ from common.common_type import CaseLabel prefix = "collection" default_schema = cf.gen_default_collection_schema() +default_binary_schema = cf.gen_default_binary_collection_schema() def assert_default_collection(collection, exp_name=None, exp_schema=default_schema, exp_num=0, exp_primary=None): @@ -36,59 +38,40 @@ class TestCollectionParams(ApiReq): def setup_method(self): pass - @pytest.fixture( - scope="function", - params=ct.get_invalid_strs - ) + @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_invalid_string(self, request): yield request.param - @pytest.fixture( - scope="function", - params=ct.get_invalid_strs - ) + @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_invalid_type_schema(self, request): if request.param is None: pytest.skip("None schema is valid") yield request.param - @pytest.fixture( - scope="function", - params=ct.get_invalid_strs - ) + @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_invalid_type_fields(self, request): skip_param = [] if request.param == skip_param: pytest.skip("skip []") yield request.param - @pytest.fixture( - scope="function", - params=cf.gen_invalid_field_types() - ) + @pytest.fixture(scope="function", params=cf.gen_invalid_field_types()) def get_invalid_field_type(self, request): yield request.param - @pytest.fixture( - scope="function", - params=cf.gen_all_type_fields() - ) + @pytest.fixture(scope="function", params=cf.gen_all_type_fields()) def get_unsupported_primary_field(self, request): if request.param.dtype == DataType.INT64: pytest.skip("int64 type is valid primary key") yield request.param - @pytest.fixture( - scope="function", - params=ct.get_invalid_strs - ) + @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_invalid_dim(self, request): if request.param == 1: request.param = 0 yield request.param @pytest.mark.tags(CaseLabel.L0) - @pytest.mark.xfail(reason="issue #5224") def test_collection(self): """ target: test collection with default schema @@ -99,7 +82,7 @@ class TestCollectionParams(ApiReq): c_name = cf.gen_unique_str(prefix) collection, _ = self.collection.collection_init(c_name, data=None, schema=default_schema) assert_default_collection(collection, c_name) - assert c_name in self.utility.list_collections() + assert c_name in self.utility.list_collections()[0] @pytest.mark.tags(CaseLabel.L0) def test_collection_empty_name(self): @@ -321,6 +304,23 @@ class TestCollectionParams(ApiReq): ex, _ = self.collection.collection_init(c_name, schema=schema) log.error(str(ex)) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5407") + def test_collection_with_unknown_type(self): + """ + target: test collection with unknown type + method: create with DataType.UNKNOWN + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + unknown_field = FieldSchema("unknown", DataType.UNKNOWN) + fields = [unknown_field, cf.gen_float_vec_field()] + schema = cf.gen_collection_schema(fields=fields) + ex, _ = self.collection.collection_init(c_name, schema=schema) + log.error(str(ex)) + # TODO assert + @pytest.mark.tags(CaseLabel.L1) def test_collection_invalid_field_name(self, get_invalid_string): """ @@ -385,15 +385,16 @@ class TestCollectionParams(ApiReq): assert not has @pytest.mark.tags(CaseLabel.L0) - def test_collection_only_float_vector(self): + @pytest.mark.parametrize("field", [cf.gen_float_vec_field(), cf.gen_binary_vec_field()]) + def test_collection_only_vector(self, field): """ - target: test collection just with float-vec field + target: test collection just with vec field method: create with float-vec fields expected: no exception """ self._connect() c_name = cf.gen_unique_str(prefix) - schema = cf.gen_collection_schema(fields=[cf.gen_float_vec_field()]) + schema = cf.gen_collection_schema(fields=[field]) collection, _ = self.collection.collection_init(c_name, schema=schema) assert_default_collection(collection, c_name, exp_schema=schema) @@ -534,7 +535,8 @@ class TestCollectionParams(ApiReq): log.info(str(ex)) @pytest.mark.tags(CaseLabel.L0) - def test_collection_vector_without_dim(self): + @pytest.mark.parametrize("dtype", [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]) + def test_collection_vector_without_dim(self, dtype): """ target: test collection without dimension method: define vector field without dim @@ -542,7 +544,7 @@ class TestCollectionParams(ApiReq): """ self._connect() c_name = cf.gen_unique_str(prefix) - float_vec_field = FieldSchema(name=ct.default_float_vec_field_name, dtype=DataType.FLOAT_VECTOR) + float_vec_field = FieldSchema(name="vec", dtype=dtype) schema = cf.gen_collection_schema(fields=[float_vec_field]) ex, _ = self.collection.collection_init(c_name, schema=schema) assert "dimension is not defined in field type params" in str(ex) @@ -605,7 +607,6 @@ class TestCollectionParams(ApiReq): collection, _ = self.collection.collection_init(c_name, schema=schema) assert_default_collection(collection, c_name, exp_schema=schema) - # TODO @pytest.mark.tags(CaseLabel.L1) def test_collection_long_desc(self): """ @@ -613,7 +614,12 @@ class TestCollectionParams(ApiReq): method: create with long desc expected: """ - pass + self._connect() + c_name = cf.gen_unique_str(prefix) + desc = "a".join("a" for _ in range(256)) + schema = cf.gen_default_collection_schema(description=desc) + collection, _ = self.collection.collection_init(c_name, schema=schema) + assert_default_collection(collection, c_name, exp_schema=schema) @pytest.mark.tags(CaseLabel.L0) @pytest.mark.xfail(reason="issue #5302") @@ -645,6 +651,49 @@ class TestCollectionParams(ApiReq): collection, _ = self.collection.collection_init(c_name, schema=default_schema, data=data) assert_default_collection(collection, c_name, exp_num=nb) + @pytest.mark.tags(CaseLabel.L0) + def test_collection_binary(self): + """ + target: test collection with binary-vec + method: create collection with binary field + expected: assert binary field + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + collection, _ = self.collection.collection_init(c_name, data=None, schema=default_binary_schema) + assert_default_collection(collection, c_name, exp_schema=default_binary_schema) + assert c_name in self.utility.list_collections()[0] + + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.xfail(reason="issue #5414") + def test_collection_binary_with_dataframe(self): + """ + target: test binary collection with dataframe + method: create binary collection with dataframe + expected: collection num entities equal to nb + """ + self._connect() + nb = ct.default_nb + c_name = cf.gen_unique_str(prefix) + data = cf.gen_default_binary_dataframe_data(3) + log.debug(data) + collection, _ = self.collection.collection_init(c_name, schema=default_binary_schema, data=data) + assert_default_collection(collection, c_name, exp_schema=default_binary_schema, exp_num=nb) + + @pytest.mark.tags(CaseLabel.L0) + def test_collection_binary_with_data_list(self): + """ + target: test collection with data (list-like) + method: create binary collection with data (list-like) + expected: collection num entities equal to nb + """ + self._connect() + nb = ct.default_nb + c_name = cf.gen_unique_str(prefix) + data = cf.gen_default_binary_list_data(nb) + collection, _ = self.collection.collection_init(c_name, schema=default_binary_schema, data=data) + assert_default_collection(collection, c_name, exp_schema=default_binary_schema, exp_num=nb) + class TestCollectionOperation(ApiReq): """ @@ -660,15 +709,16 @@ class TestCollectionOperation(ApiReq): def setup_method(self): pass - @pytest.fixture( - scope="function", - params=ct.get_invalid_strs - ) + @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_non_df(self, request): if request.param is None: pytest.skip("skip None") yield request.param + @pytest.fixture(scope="function", params=cf.gen_invalid_dataframe()) + def get_invalid_df(self, request): + yield request.param + @pytest.mark.tags(CaseLabel.L1) def test_collection_without_connection(self): """ @@ -702,7 +752,7 @@ class TestCollectionOperation(ApiReq): assert c_name not in self.utility.list_collections() @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.xfail(reason="issue #xxx") + @pytest.mark.xfail(reason="issue #5367") def test_collection_dup_name_drop(self): """ target: test collection with dup name, and drop @@ -754,15 +804,51 @@ class TestCollectionOperation(ApiReq): collection, _ = self.collection.collection_init(name=c_name, data=df) assert_default_collection(collection, exp_name=c_name, exp_num=nb, exp_schema=schema) - # TODO @pytest.mark.tags(CaseLabel.L0) - def _test_collection_created_by_invalid_dataframe(self, get_invalid_df): + @pytest.mark.xfail(reason="issue #5404") + def test_collection_created_by_empty_dataframe(self): """ - target: test create collection by invalid dataframe + target: test create collection by empty dataframe method: invalid dataframe type create collection expected: raise exception """ - pass + self._connect() + c_name = cf.gen_unique_str(prefix) + data = pd.DataFrame() + ex, _ = self.collection.collection_init(name=c_name, schema=None, data=data) + # TODO assert + + @pytest.mark.tags(CaseLabel.L1) + def test_collection_created_by_invalid_dataframe(self, get_invalid_df): + """ + target: test collection with invalid dataframe + method: create with invalid dataframe + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + ex, _ = self.collection.collection_init(name=c_name, schema=None, data=get_invalid_df) + message_one = "Cannot infer schema from empty dataframe" + message_two = "Field name should not be empty" + message_three = "Invalid field name" + assert message_one or message_two or message_three in str(ex) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5405") + def test_collection_created_by_inconsistent_dataframe(self): + """ + target: test collection with data inconsistent + method: create and insert with inconsistent data + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + # one field different type df + mix_data = [(1, 2., [0.1, 0.2]), (2, 3., 4)] + df = pd.DataFrame(data=mix_data, columns=list("ABC")) + ex, _ = self.collection.collection_init(name=c_name, schema=None, data=df) + log.info(str(ex)) + # TODO assert @pytest.mark.tags(CaseLabel.L0) def test_collection_created_by_non_dataframe(self, get_non_df): @@ -819,4 +905,65 @@ class TestCollectionOperation(ApiReq): assert not self.utility.has_collection(c_name)[0] re_collection = self._collection(name=c_name) assert_default_collection(re_collection, c_name) - assert self.utility.has_collection(c_name)[0] \ No newline at end of file + assert self.utility.has_collection(c_name)[0] + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5302") + def test_collection_binary_insert_dataframe(self): + """ + target: test collection create and insert dataframe + method: 1. create by schema 2. insert dataframe + expected: assert num_entities + """ + self._connect() + nb = ct.default_nb + collection = self._collection(schema=default_binary_schema) + assert_default_collection(collection, exp_schema=default_binary_schema) + df = cf.gen_default_binary_dataframe_data(nb) + self.collection.insert(data=df) + assert collection.num_entities == nb + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue #5414") + def test_collection_binary_created_by_dataframe(self): + """ + target: test collection with dataframe + method: create collection with dataframe + expected: create successfully + """ + self._connect() + nb = ct.default_nb + c_name = cf.gen_unique_str(prefix) + df = cf.gen_default_binary_dataframe_data(nb) + schema = cf.gen_default_binary_collection_schema() + collection, _ = self.collection.collection_init(name=c_name, data=df) + assert_default_collection(collection, exp_name=c_name, exp_num=nb, exp_schema=schema) + + @pytest.mark.tags(CaseLabel.L0) + def test_collection_binary_created_by_data_list(self): + """ + target: test create collection by data list + method: data type is list-like + expected: raise exception + """ + self._connect() + c_name = cf.gen_unique_str(prefix) + data = cf.gen_default_binary_list_data(nb=100) + ex, _ = self.collection.collection_init(name=c_name, schema=None, data=data) + assert "Data of not pandas.DataFrame type should bepassed into the schema" in str(ex) + + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.xfail(reason="issue #5414") + def test_collection_binary_insert_data(self): + """ + target: test collection create and insert list-like data + method: 1. create by schema 2. insert data + expected: assert num_entities + """ + self._connect() + nb = ct.default_nb + collection = self._collection(schema=default_binary_schema) + assert_default_collection(collection, exp_schema=default_binary_schema) + data = cf.gen_default_binary_list_data(nb) + self.collection.insert(data=data) + assert collection.num_entities == nb diff --git a/tests20/python_client/testcases/test_insert.py b/tests20/python_client/testcases/test_insert.py new file mode 100644 index 0000000000..36ace53346 --- /dev/null +++ b/tests20/python_client/testcases/test_insert.py @@ -0,0 +1,26 @@ +import pytest + +from base.client_request import ApiReq +from utils.util_log import test_log as log +from common import common_func as cf +from common import common_type as ct +from common.common_type import CaseLabel + + +class TestInsertParams(ApiReq): + """ Test case of Insert interface """ + + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.xfail(reason="issue #5421") + def test_collection_numpy_insert_data(self): + """ + target: test collection create and insert list-like data + method: 1. create by schema 2. insert data + expected: assert num_entities + """ + self._connect() + nb = 10 + collection = self._collection() + data = cf.gen_numpy_data(nb) + ex, _ = self.collection.insert(data=data) + log.error(str(ex))