From 2977416c9b350a77ce19cbc722e8d57baaea3d00 Mon Sep 17 00:00:00 2001 From: ThreadDao Date: Sat, 26 Jun 2021 13:32:11 +0800 Subject: [PATCH] Update collection and insert test cases (#6129) * add insert cases for index and threading Signed-off-by: ThreadDao * update collection and insert cases Signed-off-by: ThreadDao * update orm version in test Signed-off-by: ThreadDao --- tests/python_test/requirements.txt | 2 +- tests20/python_client/common/common_func.py | 2 + .../testcases/test_collection.py | 21 ++- .../python_client/testcases/test_insert.py | 121 ++++++++++++++---- 4 files changed, 111 insertions(+), 35 deletions(-) diff --git a/tests/python_test/requirements.txt b/tests/python_test/requirements.txt index 9f16daabcb..c342e1e030 100644 --- a/tests/python_test/requirements.txt +++ b/tests/python_test/requirements.txt @@ -12,7 +12,7 @@ pytest-print==0.2.1 pytest-level==0.1.1 pytest-xdist==2.2.1 pytest-parallel -pymilvus-orm==2.0a1.dev51 +pymilvus-orm==2.0a1.dev58 pytest-rerunfailures==9.1.1 git+https://github.com/Projectplace/pytest-tags ndg-httpsclient diff --git a/tests20/python_client/common/common_func.py b/tests20/python_client/common/common_func.py index dc1e890695..5b2bed51e9 100644 --- a/tests20/python_client/common/common_func.py +++ b/tests20/python_client/common/common_func.py @@ -141,6 +141,7 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0): }) return df + def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0): int64_values = pd.Series(data=[i for i in range(start, start + nb)]) int32_values = pd.Series(data=[np.int32(i) for i in range(start, start + nb)], dtype="int32") @@ -251,6 +252,7 @@ def gen_normal_expressions(): ] return expressions + def jaccard(x, y): x = np.asarray(x, np.bool) y = np.asarray(y, np.bool) diff --git a/tests20/python_client/testcases/test_collection.py b/tests20/python_client/testcases/test_collection.py index 9f3d805de2..62e514ea19 100644 --- a/tests20/python_client/testcases/test_collection.py +++ b/tests20/python_client/testcases/test_collection.py @@ -42,7 +42,7 @@ class TestCollectionParams(TestcaseBase): @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_invalid_dim(self, request): if request.param == 1: - request.param = 0 + pytest.skip("i is valid dim") yield request.param @pytest.mark.tags(CaseLabel.L0) @@ -253,7 +253,7 @@ class TestCollectionParams(TestcaseBase): """ self._connect() c_name = cf.gen_unique_str(prefix) - error = {ct.err_code: 1, ct.err_msg: "schema type must be schema.CollectionSchema"} + error = {ct.err_code: 0, ct.err_msg: "Schema type must be schema.CollectionSchema"} self.collection_wrap.init_collection(c_name, schema=get_none_removed_invalid_strings, check_task=CheckTasks.err_res, check_items=error) @@ -380,12 +380,11 @@ class TestCollectionParams(TestcaseBase): field_one = cf.gen_int64_field(is_primary=True) field_two = cf.gen_int64_field() schema = cf.gen_collection_schema(fields=[field_one, field_two, cf.gen_float_vec_field()]) - error = {ct.err_code: 0, ct.err_msg: "duplicated field name"} + error = {ct.err_code: 1, ct.err_msg: "duplicated field name"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) assert not self.utility_wrap.has_collection(c_name)[0] @pytest.mark.tags(CaseLabel.L0) - @pytest.mark.skip(reason="waiting for required int primary field") @pytest.mark.parametrize("field", [cf.gen_float_vec_field(), cf.gen_binary_vec_field()]) def test_collection_only_vector_field(self, field): """ @@ -394,7 +393,7 @@ class TestCollectionParams(TestcaseBase): expected: raise exception """ self._connect() - error = {ct.err_code: 0, ct.err_msg: "Must be have a primary key field"} + error = {ct.err_code: 0, ct.err_msg: "Primary field must in dataframe"} self.collection_schema_wrap.init_collection_schema([field], check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -758,7 +757,6 @@ class TestCollectionParams(TestcaseBase): self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.xfail(reason="#5950") def test_collection_vector_invalid_dim(self, get_invalid_dim): """ target: test collection with invalid dimension @@ -769,11 +767,11 @@ class TestCollectionParams(TestcaseBase): c_name = cf.gen_unique_str(prefix) float_vec_field = cf.gen_float_vec_field(dim=get_invalid_dim) schema = cf.gen_collection_schema(fields=[cf.gen_int64_field(is_primary=True), float_vec_field]) - error = {ct.err_code: 0, ct.err_msg: "dim must be of int"} + error = {ct.err_code: 0, ct.err_msg: f'invalid dim: {get_invalid_dim}'} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("dim", [-1, 32769]) + @pytest.mark.parametrize("dim", [-1, 0, 32769]) def test_collection_vector_out_bounds_dim(self, dim): """ target: test collection with out of bounds dim @@ -788,7 +786,6 @@ class TestCollectionParams(TestcaseBase): self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="waiting for primary field") def test_collection_non_vector_field_dim(self): """ target: test collection with dim for non-vector field @@ -797,9 +794,10 @@ class TestCollectionParams(TestcaseBase): """ self._connect() c_name = cf.gen_unique_str(prefix) - int_field, _ = self.field_schema_wrap.init_field_schema(name="int", dtype=DataType.INT64, dim=ct.default_dim) + int_field, _ = self.field_schema_wrap.init_field_schema(name=ct.default_int64_field_name, dtype=DataType.INT64, + dim=ct.default_dim) float_vec_field = cf.gen_float_vec_field() - schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) + schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], primary_field=ct.default_int64_field_name) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema}) @@ -1096,7 +1094,6 @@ class TestCollectionDataframe(TestcaseBase): check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.xfail(reason="issue #6077") def test_construct_with_none_auto_id(self): """ target: test construct with non-int64 as primary field diff --git a/tests20/python_client/testcases/test_insert.py b/tests20/python_client/testcases/test_insert.py index 74d1f2c731..87723acf0d 100644 --- a/tests20/python_client/testcases/test_insert.py +++ b/tests20/python_client/testcases/test_insert.py @@ -1,9 +1,12 @@ +import threading + import numpy as np import pandas as pd import pytest +from pymilvus_orm import Index from base.client_base import TestcaseBase -# from utils.util_log import test_log as log +from utils.util_log import test_log as log from common import common_func as cf from common import common_type as ct from common.common_type import CaseLabel, CheckTasks @@ -15,6 +18,8 @@ exp_num = "num_entities" exp_primary = "primary" default_schema = cf.gen_default_collection_schema() default_binary_schema = cf.gen_default_binary_collection_schema() +default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} +default_binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}} class TestInsertParams(TestcaseBase): @@ -71,7 +76,7 @@ class TestInsertParams(TestcaseBase): """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) - error = {ct.err_code: 1, ct.err_msg: "Datas must be list"} + error = {ct.err_code: 0, ct.err_msg: "Data type is not support"} collection_w.insert(data=get_non_data_type, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) @@ -84,7 +89,7 @@ class TestInsertParams(TestcaseBase): """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) - error = {ct.err_code: 1, ct.err_msg: "Column cnt not match with schema"} + error = {ct.err_code: 0, ct.err_msg: "The data fields number is not match with schema"} collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @@ -426,45 +431,78 @@ class TestInsertOperation(TestcaseBase): collection_list, _ = self.utility_wrap.list_collections() assert collection_w.name not in collection_list + @pytest.mark.tags(CaseLabel.L1) def test_insert_create_index(self): """ target: test insert and create index method: 1. insert 2. create index expected: verify num entities and index """ - pass + collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) + df = cf.gen_default_dataframe_data(ct.default_nb) + collection_w.insert(data=df) + assert collection_w.num_entities == ct.default_nb + collection_w.create_index(ct.default_float_vec_field_name, default_index_params) + assert collection_w.has_index() + index, _ = collection_w.index() + assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) + assert collection_w.indexes[0] == index + @pytest.mark.tags(CaseLabel.L1) def test_insert_after_create_index(self): """ target: test insert after create index method: 1. create index 2. insert data expected: verify index and num entities """ - pass + collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) + collection_w.create_index(ct.default_float_vec_field_name, default_index_params) + assert collection_w.has_index() + index, _ = collection_w.index() + assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) + assert collection_w.indexes[0] == index + df = cf.gen_default_dataframe_data(ct.default_nb) + collection_w.insert(data=df) + assert collection_w.num_entities == ct.default_nb + @pytest.mark.tags(CaseLabel.L2) def test_insert_binary_after_index(self): """ target: test insert binary after index method: 1.create index 2.insert binary data expected: 1.index ok 2.num entities correct """ - pass + schema = cf.gen_default_binary_collection_schema() + collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) + collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params) + assert collection_w.has_index() + index, _ = collection_w.index() + assert index == Index(collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params) + assert collection_w.indexes[0] == index + df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) + collection_w.insert(data=df) + assert collection_w.num_entities == ct.default_nb - def test_insert_search(self): + @pytest.mark.tags(CaseLabel.L1) + def test_insert_auto_id_create_index(self): """ - target: test insert and search - method: 1.insert data 2.search - expected: verify search result + target: test create index in auto_id=True collection + method: 1.create auto_id=True collection and insert 2.create index + expected: index correct """ - pass - - def test_insert_binary_search(self): - """ - target: test insert and search - method: 1.insert binary data 2.search - expected: search result correct - """ - pass + schema = cf.gen_default_collection_schema(auto_id=True) + collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) + df = cf.gen_default_dataframe_data(ct.default_nb) + df.drop(ct.default_int64_field_name, axis=1, inplace=True) + mutation_res, _ = collection_w.insert(data=df) + assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb) + assert collection_w.num_entities == ct.default_nb + # create index + collection_w.create_index(ct.default_float_vec_field_name, default_index_params) + assert collection_w.has_index() + index, _ = collection_w.index() + assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) + assert collection_w.indexes[0] == index @pytest.mark.tags(CaseLabel.L1) def test_insert_auto_id_true(self): @@ -500,8 +538,8 @@ class TestInsertOperation(TestcaseBase): assert cf._check_primary_keys(primary_keys, nb) mutation_res_1, _ = collection_w.insert(data=df) primary_keys.extend(mutation_res_1.primary_keys) - assert cf._check_primary_keys(primary_keys, nb*2) - assert collection_w.num_entities == nb*2 + assert cf._check_primary_keys(primary_keys, nb * 2) + assert collection_w.num_entities == nb * 2 @pytest.mark.tags(CaseLabel.L0) def test_insert_auto_id_true_list_data(self): @@ -581,12 +619,41 @@ class TestInsertOperation(TestcaseBase): assert mutation_res.primary_keys == data[0] assert collection_w.num_entities == nb + @pytest.mark.tags(CaseLabel.L2) def test_insert_multi_threading(self): """ target: test concurrent insert method: multi threads insert expected: verify num entities """ + collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) + df = cf.gen_default_dataframe_data(ct.default_nb) + thread_num = 4 + threads = [] + primary_keys = df[ct.default_int64_field_name].values.tolist() + + def insert(thread_i): + log.debug(f'In thread-{thread_i}') + mutation_res, _ = collection_w.insert(df) + assert mutation_res.insert_count == ct.default_nb + assert mutation_res.primary_keys == primary_keys + + for i in range(thread_num): + x = threading.Thread(target=insert, args=(i, )) + threads.append(x) + x.start() + for t in threads: + t.join() + assert collection_w.num_entities == ct.default_nb * thread_num + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.skip(reason="Currently primary keys are not unique") + def test_insert_multi_threading_auto_id(self): + """ + target: test concurrent insert auto_id=True collection + method: 1.create auto_id=True collection 2.concurrent insert + expected: verify primary keys unique + """ pass @pytest.mark.tags(CaseLabel.L2) @@ -608,6 +675,7 @@ class TestInsertOperation(TestcaseBase): assert collection_w.num_entities == ct.default_nb +@pytest.mark.skip(reason="waiting for MutationFuture") class TestInsertAsync(TestcaseBase): """ ****************************************************************** @@ -621,7 +689,16 @@ class TestInsertAsync(TestcaseBase): method: insert with async=True expected: verify num entities """ - pass + # c_name = cf.gen_unique_str(prefix) + # collection_w = self.init_collection_wrap(name=c_name) + # df = cf.gen_default_dataframe_data(nb=100) + # future, _ = collection_w.insert(data=df, _async=True) + # future.done() + # res = future.result() + # log.debug(res.primary_keys) + # assert mutation_res.insert_count == ct.default_nb + # assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist() + # assert collection_w.num_entities == ct.default_nb def test_insert_async_false(self): """