test: Update init collection method (#35596)

Related issue: #32653

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
yanliang567 2024-08-21 09:22:56 +08:00 committed by GitHub
parent 731d45abbe
commit 7ac339ac64
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 189 additions and 229 deletions

View File

@ -74,10 +74,9 @@ class TestAliasOperation(TestcaseBase):
alias_name = cf.gen_unique_str(prefix)
self.utility_wrap.create_alias(collection_w.name, alias_name)
collection_alias, _ = self.collection_wrap.init_collection(name=alias_name,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: alias_name,
exp_schema: default_schema})
collection_alias = self.init_collection_wrap(name=alias_name,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: alias_name, exp_schema: default_schema})
# assert collection is equal to alias according to partitions
assert [p.name for p in collection_w.partitions] == [
p.name for p in collection_alias.partitions]
@ -110,10 +109,9 @@ class TestAliasOperation(TestcaseBase):
alias_a_name = cf.gen_unique_str(prefix)
self.utility_wrap.create_alias(collection_1.name, alias_a_name)
collection_alias_a, _ = self.collection_wrap.init_collection(name=alias_a_name,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: alias_a_name,
exp_schema: default_schema})
collection_alias_a = self.init_collection_wrap(name=alias_a_name,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: alias_a_name, exp_schema: default_schema})
# assert collection is equal to alias according to partitions
assert [p.name for p in collection_1.partitions] == [
p.name for p in collection_alias_a.partitions]
@ -132,10 +130,9 @@ class TestAliasOperation(TestcaseBase):
alias_b_name = cf.gen_unique_str(prefix)
self.utility_wrap.create_alias(collection_2.name, alias_b_name)
collection_alias_b, _ = self.collection_wrap.init_collection(name=alias_b_name,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: alias_b_name,
exp_schema: default_schema})
collection_alias_b = self.init_collection_wrap(name=alias_b_name,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: alias_b_name, exp_schema: default_schema})
# assert collection is equal to alias according to partitions
assert [p.name for p in collection_2.partitions] == [
p.name for p in collection_alias_b.partitions]
@ -177,10 +174,9 @@ class TestAliasOperation(TestcaseBase):
alias_name = cf.gen_unique_str(prefix)
self.utility_wrap.create_alias(collection_w.name, alias_name)
# collection_w.create_alias(alias_name)
collection_alias, _ = self.collection_wrap.init_collection(name=alias_name,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: alias_name,
exp_schema: default_schema})
collection_alias = self.init_collection_wrap(name=alias_name,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: alias_name, exp_schema: default_schema})
# assert collection is equal to alias according to partitions
assert [p.name for p in collection_w.partitions] == [
p.name for p in collection_alias.partitions]
@ -406,7 +402,7 @@ class TestAliasOperation(TestcaseBase):
"""
self._connect()
c_name = cf.gen_unique_str("collection")
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema)
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
alias_name = cf.gen_unique_str(prefix)
self.utility_wrap.create_alias(collection_w.name, alias_name)
collection_alias, _ = self.collection_wrap.init_collection(name=alias_name,
@ -414,7 +410,7 @@ class TestAliasOperation(TestcaseBase):
check_items={exp_name: alias_name,
exp_schema: default_schema})
collection_alias.set_properties({'mmap.enabled': True})
pro = collection_w.describe().get("properties")
pro = collection_w.describe()[0].get("properties")
assert pro["mmap.enabled"] == 'True'
collection_w.set_properties({'mmap.enabled': False})
pro = collection_alias.describe().get("properties")

View File

@ -616,7 +616,7 @@ class TestCollectionParams(TestcaseBase):
int_field = cf.gen_int64_field(is_primary=True, auto_id=auto_id)
vec_field = cf.gen_float_vec_field(name='vec')
schema, _ = self.collection_schema_wrap.init_collection_schema([int_field, vec_field], auto_id=not auto_id)
collection_w = self.collection_wrap.init_collection(cf.gen_unique_str(prefix), schema=schema)[0]
collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), schema=schema)
assert collection_w.schema.auto_id is auto_id

View File

@ -923,23 +923,24 @@ class TestNewIndexBase(TestcaseBase):
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema)
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
collection_w.insert(cf.gen_default_list_data())
collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name=ct.default_index_name)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
assert collection_w.index().params["mmap.enabled"] == 'True'
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.load()
collection_w.release()
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': False})
collection_w.load()
assert collection_w.index().params["mmap.enabled"] == 'False'
assert collection_w.index()[0].params["mmap.enabled"] == 'False'
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp)
collection_w.release()
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
assert collection_w.index().params["mmap.enabled"] == 'True'
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.load()
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
@ -957,12 +958,11 @@ class TestNewIndexBase(TestcaseBase):
expected: search success
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=cf.gen_default_collection_schema())
collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False)[0]
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
collection_w.create_index(field_name, default_index, index_name=f"mmap_index_{index}")
collection_w.alter_index(f"mmap_index_{index}", {'mmap.enabled': True})
assert collection_w.index().params["mmap.enabled"] == 'True'
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.drop_index(index_name=f"mmap_index_{index}")
collection_w.create_index(field_name, default_index, index_name=f"index_{index}")
collection_w.load()
@ -983,21 +983,21 @@ class TestNewIndexBase(TestcaseBase):
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema)
collection_w = self.init_collection_general(c_name, insert_data=True, is_index=False)[0]
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
collection_w.set_properties({'mmap.enabled': True})
pro = collection_w.describe().get("properties")
pro = collection_w.describe()[0].get("properties")
assert pro["mmap.enabled"] == 'True'
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
assert collection_w.index().params["mmap.enabled"] == 'True'
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.insert(cf.gen_default_list_data())
collection_w.flush()
# check if mmap works after rebuild index
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
assert collection_w.index().params["mmap.enabled"] == 'True'
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
collection_w.load()
collection_w.release()
@ -1005,8 +1005,8 @@ class TestNewIndexBase(TestcaseBase):
# check if mmap works after reloading and rebuilding index.
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
index_name=ct.default_index_name)
assert collection_w.index().params["mmap.enabled"] == 'True'
pro = collection_w.describe().get("properties")
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
pro = collection_w.describe()[0].get("properties")
assert pro["mmap.enabled"] == 'True'
collection_w.load()

View File

@ -27,7 +27,7 @@ class TestIssues(TestcaseBase):
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
auto_id=False, partition_key_field=par_key_field)
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, num_partitions=9)
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9)
# insert
nb = 500
@ -61,7 +61,7 @@ class TestIssues(TestcaseBase):
seeds = 200
rand_ids = random.sample(range(0, num_entities), seeds)
rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))]
res = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field])
res, _ = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field])
# verify every the random id exists
assert len(res) == len(rand_ids)
@ -69,8 +69,8 @@ class TestIssues(TestcaseBase):
for i in range(len(res)):
pk = res[i].get("pk")
parkey_value = res[i].get(par_key_field)
res_parkey = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'",
output_fields=["pk", par_key_field])
res_parkey, _ = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'",
output_fields=["pk", par_key_field])
if len(res_parkey) != 1:
log.info(f"dirty data found: pk {pk} with parkey {parkey_value}")
dirty_count += 1

View File

@ -24,7 +24,7 @@ class TestPartitionKeyParams(TestcaseBase):
vector_field = cf.gen_float_vec_field()
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True)
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema)
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
assert len(collection_w.partitions) == ct.default_partition_num
# insert
@ -53,23 +53,24 @@ class TestPartitionKeyParams(TestcaseBase):
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "limit": ct.default_limit})[0]
check_items={"nq": nq, "limit": entities_per_parkey})[0]
# search with partition key filter only or with non partition key
res2 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
param=ct.default_search_params, limit=entities_per_parkey,
expr=f'{int64_field.name} in [1,3,5]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "limit": ct.default_limit})[0]
check_items={"nq": nq, "limit": entities_per_parkey})[0]
# search with partition key filter only or with non partition key
res3 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
param=ct.default_search_params, limit=entities_per_parkey,
expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "limit": ct.default_limit})[0]
check_items={"nq": nq, "limit": entities_per_parkey})[0]
# assert the results persist
assert res1.ids == res2.ids == res3.ids
for i in range(nq):
assert res1[i].ids == res2[i].ids == res3[i].ids
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name])
@ -89,14 +90,14 @@ class TestPartitionKeyParams(TestcaseBase):
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
auto_id=False, partition_key_field=par_key_field)
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, num_partitions=9)
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9)
# insert
nb = 1000
string_prefix = cf.gen_str_by_length(length=6)
entities_per_parkey = 20
for n in range(entities_per_parkey):
pk_values = [str(i) for i in range(n * nb, (n+1)*nb)]
pk_values = [str(i) for i in range(n * nb, (n + 1) * nb)]
int64_values = [i for i in range(0, nb)]
string_values = [string_prefix + str(i) for i in range(0, nb)]
float_vec_values = gen_vectors(nb, ct.default_dim)
@ -120,7 +121,7 @@ class TestPartitionKeyParams(TestcaseBase):
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "limit": ct.default_limit})[0]
check_items={"nq": nq, "limit": entities_per_parkey})[0]
@pytest.mark.tags(CaseLabel.L1)
def test_partition_key_off_in_field_but_enable_in_schema(self):
@ -139,8 +140,7 @@ class TestPartitionKeyParams(TestcaseBase):
err_msg = "fail to create collection"
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=10)
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=10)
assert len(collection_w.partitions) == 10
@pytest.mark.skip("need more investigation")
@ -152,44 +152,7 @@ class TestPartitionKeyParams(TestcaseBase):
2. bulk insert data
3. verify the data bulk inserted and be searched successfully
"""
self._connect()
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
int64_field = cf.gen_int64_field()
string_field = cf.gen_string_field(is_partition_key=True)
vector_field = cf.gen_float_vec_field()
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
auto_id=True)
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=10)
# bulk insert
nb = 1000
string_prefix = cf.gen_str_by_length(length=6)
entities_per_parkey = 20
for n in range(entities_per_parkey):
pk_values = [str(i) for i in range(n * nb, (n+1)*nb)]
int64_values = [i for i in range(0, nb)]
string_values = [string_prefix + str(i) for i in range(0, nb)]
float_vec_values = gen_vectors(nb, ct.default_dim)
data = [pk_values, int64_values, string_values, float_vec_values]
collection_w.insert(data)
# flush
collection_w.flush()
# build index
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
# load
collection_w.load()
# search
nq = 10
search_vectors = gen_vectors(nq, ct.default_dim)
# search with mixed filtered
res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
param=ct.default_search_params, limit=entities_per_parkey,
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "limit": ct.default_limit})[0]
pass
class TestPartitionKeyInvalidParams(TestcaseBase):
@ -212,8 +175,7 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
auto_id=True)
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=max_partition)
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=max_partition)
assert len(collection_w.partitions) == max_partition
# insert
@ -233,10 +195,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
num_partitions = max_partition + 1
err_msg = f"partition number ({num_partitions}) exceeds max configuration ({max_partition})"
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=num_partitions,
check_task=CheckTasks.err_res,
check_items={"err_code": 1100, "err_msg": err_msg})
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=num_partitions,
check_task=CheckTasks.err_res,
check_items={"err_code": 1100, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L1)
def test_min_partitions(self):
@ -257,8 +218,7 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field=int64_field.name)
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=min_partition)
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition)
assert len(collection_w.partitions) == min_partition
# insert
@ -279,14 +239,12 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
# create a collection with min partitions - 1
err_msg = "The specified num_partitions should be greater than or equal to 1"
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=min_partition - 1,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=min_partition - 3,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition - 1,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition - 3,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("is_par_key", [None, "", "invalid", 0.1, [], {}, ()])
@ -298,9 +256,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
"""
self._connect()
err_msg = "Param is_partition_key must be bool type"
int64_field = cf.gen_int64_field(is_partition_key=is_par_key,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_int64_field(is_partition_key=is_par_key,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("num_partitions", [True, False, "", "invalid", 0.1, [], {}, ()])
@ -319,10 +277,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
err_msg = "invalid num_partitions type"
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=num_partitions,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=num_partitions,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L0)
def test_partition_key_on_multi_fields(self):
@ -338,30 +295,30 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
string_field = cf.gen_string_field(is_partition_key=True)
vector_field = cf.gen_float_vec_field()
err_msg = "Expected only one partition key field"
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
# both defined in collection schema
err_msg = "Param partition_key_field must be str type"
int64_field = cf.gen_int64_field()
string_field = cf.gen_string_field()
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field=[int64_field.name, string_field.name],
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field=[int64_field.name, string_field.name],
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
# one defined in field schema, one defined in collection schema
err_msg = "Expected only one partition key field"
int64_field = cf.gen_int64_field(is_partition_key=True)
string_field = cf.gen_string_field()
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field=string_field.name,
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field=string_field.name,
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("is_int64_primary", [True, False])
@ -384,9 +341,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
err_msg = "the partition key field must not be primary field"
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.init_collection_wrap(name=c_name, schema=schema,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
# if settings on collection schema
if is_int64_primary:
@ -399,9 +356,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
err_msg = "the partition key field must not be primary field"
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.init_collection_wrap(name=c_name, schema=schema,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L0)
def test_partition_key_on_and_off(self):
@ -416,21 +373,21 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
string_field = cf.gen_string_field()
vector_field = cf.gen_float_vec_field()
err_msg = "Expected only one partition key field"
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field=vector_field.name,
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field=vector_field.name,
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
# if two fields with same type
string_field = cf.gen_string_field(name="string1", is_partition_key=True)
string_field2 = cf.gen_string_field(name="string2")
err_msg = "Expected only one partition key field"
schema = cf.gen_collection_schema(fields=[pk_field, string_field, string_field2, vector_field],
partition_key_field=string_field2.name,
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_collection_schema(fields=[pk_field, string_field, string_field2, vector_field],
partition_key_field=string_field2.name,
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("field_type", [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR, DataType.FLOAT,
@ -458,12 +415,12 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
vector_field = cf.gen_binary_vec_field(is_partition_key=(field_type == DataType.BINARY_VECTOR))
err_msg = "Partition key field type must be DataType.INT64 or DataType.VARCHAR"
schema = cf.gen_collection_schema(fields=[pk_field, int8_field, int16_field, int32_field,
bool_field, float_field, double_field, json_field,
int64_field, string_field, vector_field],
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_collection_schema(fields=[pk_field, int8_field, int16_field, int32_field,
bool_field, float_field, double_field, json_field,
int64_field, string_field, vector_field],
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L1)
def test_partition_key_on_not_existed_fields(self):
@ -478,11 +435,11 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
string_field = cf.gen_string_field()
vector_field = cf.gen_float_vec_field()
err_msg = "the specified partition key field {non_existing_field} not exist"
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field="non_existing_field",
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field="non_existing_field",
auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L1)
def test_partition_key_on_empty_and_num_partitions_set(self):
@ -497,18 +454,17 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
string_field = cf.gen_string_field()
vector_field = cf.gen_float_vec_field()
err_msg = "the specified partition key field {} not exist"
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field="", auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
partition_key_field="", auto_id=True,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
schema = cf.gen_default_collection_schema()
err_msg = "num_partitions should only be specified with partition key field enabled"
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
num_partitions=200,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=200,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("invalid_data", [99, True, None, [], {}, ()])
@ -528,7 +484,7 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
partition_key_field=string_field.name, auto_id=False)
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema)
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
# insert
nb = 10
@ -541,7 +497,7 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
data = [pk_values, int64_values, string_values, float_vec_values]
err_msg = "expect string input"
self.collection_wrap.insert(data, check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
collection_w.insert(data, check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
class TestPartitionApiForbidden(TestcaseBase):
@ -564,23 +520,23 @@ class TestPartitionApiForbidden(TestcaseBase):
vector_field = cf.gen_float_vec_field()
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True)
c_name = cf.gen_unique_str("par_key")
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema)
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
# create partition
err_msg = "disable create partition if partition key mode is used"
partition_name = cf.gen_unique_str("partition")
self.collection_wrap.create_partition(partition_name,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.partition_wrap.init_partition(collection_w, partition_name,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
collection_w.create_partition(partition_name,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.init_partition_wrap(collection_w, partition_name,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
# get partition is allowed
partitions = self.collection_wrap.partitions
partitions = collection_w.partitions
collection_w.partition(partitions[0].name)
self.partition_wrap.init_partition(collection_w, partitions[0].name)
assert self.partition_wrap.name == partitions[0].name
partition_w = self.init_partition_wrap(collection_w, partitions[0].name)
assert partition_w.name == partitions[0].name
# has partition is allowed
assert collection_w.has_partition(partitions[0].name)
assert self.utility_wrap.has_partition(collection_w.name, partitions[0].name)
@ -594,21 +550,21 @@ class TestPartitionApiForbidden(TestcaseBase):
string_values = [string_prefix + str(i) for i in range(0, nb)]
float_vec_values = gen_vectors(nb, ct.default_dim)
data = [int64_values, string_values, float_vec_values]
self.collection_wrap.insert(data)
collection_w.insert(data)
err_msg = "not support manually specifying the partition names if partition key mode is used"
self.partition_wrap.insert(data, check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.collection_wrap.insert(data, partition_name=partitions[0].name,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
partition_w.insert(data, check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
collection_w.insert(data, partition_name=partitions[0].name,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
err_msg = "disable load partitions if partition key mode is used"
self.partition_wrap.load(check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
self.collection_wrap.load(partition_names=[partitions[0].name],
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
partition_w.load(check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
collection_w.load(partition_names=[partitions[0].name],
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
# flush
collection_w.flush()
@ -621,26 +577,26 @@ class TestPartitionApiForbidden(TestcaseBase):
nq = 10
search_vectors = gen_vectors(nq, ct.default_dim)
# search with mixed filtered
res1 = self.collection_wrap.search(data=search_vectors, anns_field=vector_field.name,
param=ct.default_search_params, limit=entities_per_parkey,
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "limit": ct.default_limit})[0]
res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
param=ct.default_search_params, limit=entities_per_parkey,
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "limit": ct.default_limit})[0]
pks = res1[0].ids[:3]
err_msg = "not support manually specifying the partition names if partition key mode is used"
self.collection_wrap.search(data=search_vectors, anns_field=vector_field.name, partition_names=[partitions[0].name],
param=ct.default_search_params, limit=entities_per_parkey,
expr=f'{int64_field.name} in [1,3,5]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.err_res,
check_items={"err_code": nq, "err_msg": err_msg})
self.partition_wrap.search(data=search_vectors, anns_field=vector_field.name,
params=ct.default_search_params, limit=entities_per_parkey,
expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.err_res,
check_items={"err_code": nq, "err_msg": err_msg})
collection_w.search(data=search_vectors, anns_field=vector_field.name, partition_names=[partitions[0].name],
param=ct.default_search_params, limit=entities_per_parkey,
expr=f'{int64_field.name} in [1,3,5]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.err_res,
check_items={"err_code": nq, "err_msg": err_msg})
partition_w.search(data=search_vectors, anns_field=vector_field.name,
params=ct.default_search_params, limit=entities_per_parkey,
expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
output_fields=[int64_field.name, string_field.name],
check_task=CheckTasks.err_res,
check_items={"err_code": nq, "err_msg": err_msg})
# partition loading progress is allowed
self.utility_wrap.loading_progress(collection_name=collection_w.name)
@ -652,18 +608,22 @@ class TestPartitionApiForbidden(TestcaseBase):
self.utility_wrap.wait_for_loading_complete(collection_name=collection_w.name,
partition_names=[partitions[0].name])
# partition flush is allowed: #24165
self.partition_wrap.flush()
partition_w.flush()
# partition delete is not allowed
self.partition_wrap.delete(expr=f'{pk_field.name} in {pks}',
check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
self.collection_wrap.delete(expr=f'{pk_field.name} in {pks}', partition_name=partitions[0].name,
check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
partition_w.delete(expr=f'{pk_field.name} in {pks}',
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
collection_w.delete(expr=f'{pk_field.name} in {pks}', partition_name=partitions[0].name,
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
# partition query is not allowed
self.partition_wrap.query(expr=f'{pk_field.name} in {pks}',
check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
self.collection_wrap.query(expr=f'{pk_field.name} in {pks}', partition_names=[partitions[0].name],
check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
partition_w.query(expr=f'{pk_field.name} in {pks}',
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
collection_w.query(expr=f'{pk_field.name} in {pks}', partition_names=[partitions[0].name],
check_task=CheckTasks.err_res,
check_items={"err_code": 2, "err_msg": err_msg})
# partition upsert is not allowed
# self.partition_wrap.upsert(data=data, check_task=CheckTasks.err_res,
# check_items={"err_code": 2, "err_msg": err_msg})
@ -671,10 +631,10 @@ class TestPartitionApiForbidden(TestcaseBase):
# chek_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
# partition release
err_msg = "disable release partitions if partition key mode is used"
self.partition_wrap.release(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
partition_w.release(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
# partition drop
err_msg = "disable drop partition if partition key mode is used"
self.partition_wrap.drop(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
partition_w.drop(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
# # partition bulk insert
# self.utility_wrap.do_bulk_insert(collection_w.name, files, partition_names=[partitions[0].name],

View File

@ -3853,7 +3853,7 @@ class TestQueryCount(TestcaseBase):
self._connect()
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_default_sparse_schema()
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=schema)
collection_w = self.init_collection_wrap(c_name, schema=schema)
data = cf.gen_default_list_sparse_data()
collection_w.insert(data)
params = cf.get_index_params_params(index)

View File

@ -4676,9 +4676,9 @@ class TestCollectionSearch(TestcaseBase):
self._connect()
c_name = cf.gen_unique_str(prefix)
binary_schema = cf.gen_default_binary_collection_schema(dim=dim)
self.collection_wrap.init_collection(c_name, schema=binary_schema,
check_task=CheckTasks.err_res,
check_items={"err_code": 65535, "err_msg": f"invalid dimension {dim}."})
self.init_collection_wrap(c_name, schema=binary_schema,
check_task=CheckTasks.err_res,
check_items={"err_code": 999, "err_msg": f"invalid dimension: {dim}."})
class TestSearchBase(TestcaseBase):
@ -5175,8 +5175,9 @@ class TestSearchBase(TestcaseBase):
expected: search success
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=cf.gen_default_collection_schema())
nb = 2000
dim = 32
collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False)[0]
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
collection_w.create_index(field_name, default_index, index_name="mmap_index")
@ -5185,13 +5186,18 @@ class TestSearchBase(TestcaseBase):
# search
collection_w.load()
search_params = cf.gen_search_param(index)[0]
vector = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vector, default_search_field, search_params, ct.default_limit)
vector = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
collection_w.search(vector, default_search_field, search_params, ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": ct.default_limit})
# enable mmap
collection_w.release()
collection_w.alter_index("mmap_index", {'mmap.enabled': False})
collection_w.load()
collection_w.search(vector, default_search_field, search_params, ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": ct.default_limit})
@ -5206,29 +5212,27 @@ class TestSearchBase(TestcaseBase):
"""
self._connect()
dim = 64
c_name = cf.gen_unique_str(prefix)
default_schema = cf.gen_default_binary_collection_schema(auto_id=False, dim=dim,
primary_field=ct.default_int64_field_name)
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema)
nb = 2000
collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, is_binary=True)[0]
params = cf.get_index_params_params(index)
default_index = {"index_type": index,
"params": params, "metric_type": "JACCARD"}
collection_w.create_index("binary_vector", default_index, index_name="binary_idx_name")
collection_w.create_index(ct.default_binary_vec_field_name, default_index, index_name="binary_idx_name")
collection_w.alter_index("binary_idx_name", {'mmap.enabled': True})
collection_w.set_properties({'mmap.enabled': True})
collection_w.load()
pro = collection_w.describe().get("properties")
pro = collection_w.describe()[0].get("properties")
assert pro["mmap.enabled"] == 'True'
assert collection_w.index().params["mmap.enabled"] == 'True'
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
# search
binary_vectors = cf.gen_binary_vectors(3000, dim)[1]
binary_vectors = cf.gen_binary_vectors(default_nq, dim)[1]
search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
output_fields = [default_string_field_name]
collection_w.search(binary_vectors[:default_nq], "binary_vector", search_params,
output_fields = ["*"]
collection_w.search(binary_vectors, ct.default_binary_vec_field_name, search_params,
default_limit, default_search_string_exp, output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"limit": ct.default_top_k})
check_items={"nq": default_nq,
"limit": default_limit})
class TestSearchDSL(TestcaseBase):