mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
test:[cherry-pick] Add upsert with autoid and update init collection method (#35624)
Related issue: https://github.com/milvus-io/milvus/issues/32653 pr: #35596 --------- Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
parent
e2542a1bf5
commit
d7d4525c2e
@ -336,10 +336,10 @@ class ApiCollectionWrapper:
|
||||
return res, check_result
|
||||
|
||||
@trace()
|
||||
def compact(self, timeout=None, check_task=None, check_items=None, **kwargs):
|
||||
def compact(self, is_clustering=False, timeout=None, check_task=None, check_items=None, **kwargs):
|
||||
timeout = TIMEOUT if timeout is None else timeout
|
||||
func_name = sys._getframe().f_code.co_name
|
||||
res, check = api_request([self.collection.compact, timeout], **kwargs)
|
||||
res, check = api_request([self.collection.compact, is_clustering, timeout], **kwargs)
|
||||
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
|
||||
return res, check_result
|
||||
|
||||
|
||||
@ -12,8 +12,8 @@ allure-pytest==2.7.0
|
||||
pytest-print==0.2.1
|
||||
pytest-level==0.1.1
|
||||
pytest-xdist==2.5.0
|
||||
pymilvus==2.4.5rc11
|
||||
pymilvus[bulk_writer]==2.4.5rc11
|
||||
pymilvus==2.4.6rc4
|
||||
pymilvus[bulk_writer]==2.4.6rc4
|
||||
pytest-rerunfailures==9.1.1
|
||||
git+https://github.com/Projectplace/pytest-tags
|
||||
ndg-httpsclient
|
||||
|
||||
@ -74,10 +74,9 @@ class TestAliasOperation(TestcaseBase):
|
||||
|
||||
alias_name = cf.gen_unique_str(prefix)
|
||||
self.utility_wrap.create_alias(collection_w.name, alias_name)
|
||||
collection_alias, _ = self.collection_wrap.init_collection(name=alias_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_name,
|
||||
exp_schema: default_schema})
|
||||
collection_alias = self.init_collection_wrap(name=alias_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_name, exp_schema: default_schema})
|
||||
# assert collection is equal to alias according to partitions
|
||||
assert [p.name for p in collection_w.partitions] == [
|
||||
p.name for p in collection_alias.partitions]
|
||||
@ -110,10 +109,9 @@ class TestAliasOperation(TestcaseBase):
|
||||
|
||||
alias_a_name = cf.gen_unique_str(prefix)
|
||||
self.utility_wrap.create_alias(collection_1.name, alias_a_name)
|
||||
collection_alias_a, _ = self.collection_wrap.init_collection(name=alias_a_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_a_name,
|
||||
exp_schema: default_schema})
|
||||
collection_alias_a = self.init_collection_wrap(name=alias_a_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_a_name, exp_schema: default_schema})
|
||||
# assert collection is equal to alias according to partitions
|
||||
assert [p.name for p in collection_1.partitions] == [
|
||||
p.name for p in collection_alias_a.partitions]
|
||||
@ -132,10 +130,9 @@ class TestAliasOperation(TestcaseBase):
|
||||
|
||||
alias_b_name = cf.gen_unique_str(prefix)
|
||||
self.utility_wrap.create_alias(collection_2.name, alias_b_name)
|
||||
collection_alias_b, _ = self.collection_wrap.init_collection(name=alias_b_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_b_name,
|
||||
exp_schema: default_schema})
|
||||
collection_alias_b = self.init_collection_wrap(name=alias_b_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_b_name, exp_schema: default_schema})
|
||||
# assert collection is equal to alias according to partitions
|
||||
assert [p.name for p in collection_2.partitions] == [
|
||||
p.name for p in collection_alias_b.partitions]
|
||||
@ -177,10 +174,9 @@ class TestAliasOperation(TestcaseBase):
|
||||
alias_name = cf.gen_unique_str(prefix)
|
||||
self.utility_wrap.create_alias(collection_w.name, alias_name)
|
||||
# collection_w.create_alias(alias_name)
|
||||
collection_alias, _ = self.collection_wrap.init_collection(name=alias_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_name,
|
||||
exp_schema: default_schema})
|
||||
collection_alias = self.init_collection_wrap(name=alias_name,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: alias_name, exp_schema: default_schema})
|
||||
# assert collection is equal to alias according to partitions
|
||||
assert [p.name for p in collection_w.partitions] == [
|
||||
p.name for p in collection_alias.partitions]
|
||||
@ -406,7 +402,7 @@ class TestAliasOperation(TestcaseBase):
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str("collection")
|
||||
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema)
|
||||
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
|
||||
alias_name = cf.gen_unique_str(prefix)
|
||||
self.utility_wrap.create_alias(collection_w.name, alias_name)
|
||||
collection_alias, _ = self.collection_wrap.init_collection(name=alias_name,
|
||||
@ -414,7 +410,7 @@ class TestAliasOperation(TestcaseBase):
|
||||
check_items={exp_name: alias_name,
|
||||
exp_schema: default_schema})
|
||||
collection_alias.set_properties({'mmap.enabled': True})
|
||||
pro = collection_w.describe().get("properties")
|
||||
pro = collection_w.describe()[0].get("properties")
|
||||
assert pro["mmap.enabled"] == 'True'
|
||||
collection_w.set_properties({'mmap.enabled': False})
|
||||
pro = collection_alias.describe().get("properties")
|
||||
|
||||
@ -616,7 +616,7 @@ class TestCollectionParams(TestcaseBase):
|
||||
int_field = cf.gen_int64_field(is_primary=True, auto_id=auto_id)
|
||||
vec_field = cf.gen_float_vec_field(name='vec')
|
||||
schema, _ = self.collection_schema_wrap.init_collection_schema([int_field, vec_field], auto_id=not auto_id)
|
||||
collection_w = self.collection_wrap.init_collection(cf.gen_unique_str(prefix), schema=schema)[0]
|
||||
collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix), schema=schema)
|
||||
|
||||
assert collection_w.schema.auto_id is auto_id
|
||||
|
||||
|
||||
@ -923,24 +923,24 @@ class TestNewIndexBase(TestcaseBase):
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema)
|
||||
collection_w = self.init_collection_wrap(c_name, schema=default_schema)
|
||||
collection_w.insert(cf.gen_default_list_data())
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
|
||||
index_name=ct.default_index_name)
|
||||
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
|
||||
assert collection_w.index().params["mmap.enabled"] == 'True'
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
|
||||
collection_w.load()
|
||||
collection_w.release()
|
||||
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': False})
|
||||
collection_w.load()
|
||||
assert collection_w.index().params["mmap.enabled"] == 'False'
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'False'
|
||||
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp)
|
||||
collection_w.release()
|
||||
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
|
||||
assert collection_w.index().params["mmap.enabled"] == 'True'
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
|
||||
collection_w.load()
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
@ -958,12 +958,11 @@ class TestNewIndexBase(TestcaseBase):
|
||||
expected: search success
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=cf.gen_default_collection_schema())
|
||||
collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False)[0]
|
||||
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
|
||||
collection_w.create_index(field_name, default_index, index_name=f"mmap_index_{index}")
|
||||
collection_w.alter_index(f"mmap_index_{index}", {'mmap.enabled': True})
|
||||
assert collection_w.index().params["mmap.enabled"] == 'True'
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
|
||||
collection_w.drop_index(index_name=f"mmap_index_{index}")
|
||||
collection_w.create_index(field_name, default_index, index_name=f"index_{index}")
|
||||
collection_w.load()
|
||||
@ -984,21 +983,21 @@ class TestNewIndexBase(TestcaseBase):
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema)
|
||||
collection_w = self.init_collection_general(c_name, insert_data=True, is_index=False)[0]
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
|
||||
index_name=ct.default_index_name)
|
||||
collection_w.set_properties({'mmap.enabled': True})
|
||||
pro = collection_w.describe().get("properties")
|
||||
pro = collection_w.describe()[0].get("properties")
|
||||
assert pro["mmap.enabled"] == 'True'
|
||||
collection_w.alter_index(ct.default_index_name, {'mmap.enabled': True})
|
||||
assert collection_w.index().params["mmap.enabled"] == 'True'
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
|
||||
collection_w.insert(cf.gen_default_list_data())
|
||||
collection_w.flush()
|
||||
|
||||
# check if mmap works after rebuild index
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
|
||||
index_name=ct.default_index_name)
|
||||
assert collection_w.index().params["mmap.enabled"] == 'True'
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
|
||||
|
||||
collection_w.load()
|
||||
collection_w.release()
|
||||
@ -1006,8 +1005,8 @@ class TestNewIndexBase(TestcaseBase):
|
||||
# check if mmap works after reloading and rebuilding index.
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index_params,
|
||||
index_name=ct.default_index_name)
|
||||
assert collection_w.index().params["mmap.enabled"] == 'True'
|
||||
pro = collection_w.describe().get("properties")
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
|
||||
pro = collection_w.describe()[0].get("properties")
|
||||
assert pro["mmap.enabled"] == 'True'
|
||||
|
||||
collection_w.load()
|
||||
@ -1305,7 +1304,7 @@ class TestIndexInvalid(TestcaseBase):
|
||||
"""
|
||||
target: test create scalar index on array field
|
||||
method: 1.create collection, and create index
|
||||
expected: Raise exception
|
||||
expected: supported create inverted index on array since 2.4.x
|
||||
"""
|
||||
# 1. create a collection
|
||||
schema = cf.gen_array_collection_schema()
|
||||
@ -1313,6 +1312,9 @@ class TestIndexInvalid(TestcaseBase):
|
||||
# 2. create index
|
||||
scalar_index_params = {"index_type": "INVERTED"}
|
||||
collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params)
|
||||
res, _ = self.utility_wrap.index_building_progress(collection_w.name, ct.default_int32_array_field_name)
|
||||
exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0, 'state': 'Finished'}
|
||||
assert res == exp_res
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_create_inverted_index_no_vector_index(self):
|
||||
|
||||
@ -513,7 +513,7 @@ class TestInsertOperation(TestcaseBase):
|
||||
data = [vectors, ["limit_1___________",
|
||||
"limit_2___________"], ['1', '2']]
|
||||
error = {ct.err_code: 999,
|
||||
ct.err_msg: "invalid input, length of string exceeds max length"}
|
||||
ct.err_msg: "length of string exceeds max length"}
|
||||
collection_w.insert(
|
||||
data, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@ -815,16 +815,6 @@ class TestInsertOperation(TestcaseBase):
|
||||
t.join()
|
||||
assert collection_w.num_entities == ct.default_nb * thread_num
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.skip(reason="Currently primary keys are not unique")
|
||||
def test_insert_multi_threading_auto_id(self):
|
||||
"""
|
||||
target: test concurrent insert auto_id=True collection
|
||||
method: 1.create auto_id=True collection 2.concurrent insert
|
||||
expected: verify primary keys unique
|
||||
"""
|
||||
pass
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_insert_multi_times(self, dim):
|
||||
"""
|
||||
@ -1211,11 +1201,11 @@ class TestInsertInvalid(TestcaseBase):
|
||||
check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_insert_invalid_with_pk_varchar_auto_id_true(self):
|
||||
def test_insert_with_pk_varchar_auto_id_true(self):
|
||||
"""
|
||||
target: test insert invalid with pk varchar and auto id true
|
||||
method: set pk varchar max length < 18, insert data
|
||||
expected: raise exception
|
||||
expected: varchar pk supports auto_id=true
|
||||
"""
|
||||
string_field = cf.gen_string_field(is_primary=True, max_length=6)
|
||||
embedding_field = cf.gen_float_vec_field()
|
||||
@ -1547,8 +1537,56 @@ class TestUpsertValid(TestcaseBase):
|
||||
res = collection_w.query(exp, output_fields=[default_float_name])[0]
|
||||
assert [res[i][default_float_name] for i in range(upsert_nb)] == float_values.to_list()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_upsert_with_primary_key_string(self):
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_upsert_with_auto_id(self):
|
||||
"""
|
||||
target: test upsert with auto id
|
||||
method: 1. create a collection with autoID=true
|
||||
2. upsert 10 entities with non-existing pks
|
||||
verify: success, and the pks are auto-generated
|
||||
3. query 10 entities to get the existing pks
|
||||
4. upsert 10 entities with existing pks
|
||||
verify: success, and the pks are re-generated, and the new pks are visibly
|
||||
"""
|
||||
dim = 32
|
||||
collection_w, _, _, insert_ids, _ = self.init_collection_general(pre_upsert, auto_id=True,
|
||||
dim=dim, insert_data=True, with_json=False)
|
||||
nb = 10
|
||||
start = ct.default_nb * 10
|
||||
data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False)
|
||||
res_upsert1 = collection_w.upsert(data=data)[0]
|
||||
collection_w.flush()
|
||||
# assert the pks are auto-generated, and num_entities increased for upsert with non_existing pks
|
||||
assert res_upsert1.primary_keys[0] > insert_ids[-1]
|
||||
assert collection_w.num_entities == ct.default_nb + nb
|
||||
|
||||
# query 10 entities to get the existing pks
|
||||
res_q = collection_w.query(expr='', limit=nb)[0]
|
||||
print(f"res_q: {res_q}")
|
||||
existing_pks = [res_q[i][ct.default_int64_field_name] for i in range(nb)]
|
||||
existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}",
|
||||
output_fields=[ct.default_count_output])[0]
|
||||
assert nb == existing_count[0].get(ct.default_count_output)
|
||||
# upsert 10 entities with the existing pks
|
||||
start = ct.default_nb * 20
|
||||
data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False)
|
||||
data[0] = existing_pks
|
||||
res_upsert2 = collection_w.upsert(data=data)[0]
|
||||
collection_w.flush()
|
||||
# assert the new pks are auto-generated again
|
||||
assert res_upsert2.primary_keys[0] > res_upsert1.primary_keys[-1]
|
||||
existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}",
|
||||
output_fields=[ct.default_count_output])[0]
|
||||
assert 0 == existing_count[0].get(ct.default_count_output)
|
||||
res_q = collection_w.query(expr=f"{ct.default_int64_field_name} in {res_upsert2.primary_keys}",
|
||||
output_fields=["*"])[0]
|
||||
assert nb == len(res_q)
|
||||
current_count = collection_w.query(expr='', output_fields=[ct.default_count_output])[0]
|
||||
assert current_count[0].get(ct.default_count_output) == ct.default_nb + nb
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
def test_upsert_with_primary_key_string(self, auto_id):
|
||||
"""
|
||||
target: test upsert with string primary key
|
||||
method: 1. create a collection with pk string
|
||||
@ -1558,11 +1596,18 @@ class TestUpsertValid(TestcaseBase):
|
||||
"""
|
||||
c_name = cf.gen_unique_str(pre_upsert)
|
||||
fields = [cf.gen_string_field(), cf.gen_float_vec_field(dim=ct.default_dim)]
|
||||
schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name)
|
||||
schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name,
|
||||
auto_id=auto_id)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
||||
vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(2)]
|
||||
collection_w.insert([["a", "b"], vectors])
|
||||
collection_w.upsert([[" a", "b "], vectors])
|
||||
if not auto_id:
|
||||
collection_w.insert([["a", "b"], vectors])
|
||||
res_upsert = collection_w.upsert([[" a", "b "], vectors])[0]
|
||||
assert res_upsert.primary_keys[0] == " a" and res_upsert.primary_keys[1] == "b "
|
||||
else:
|
||||
collection_w.insert([vectors])
|
||||
res_upsert = collection_w.upsert([[" a", "b "], vectors])[0]
|
||||
assert res_upsert.primary_keys[0] != " a" and res_upsert.primary_keys[1] != "b "
|
||||
assert collection_w.num_entities == 4
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@ -2046,7 +2091,7 @@ class TestUpsertInvalid(TestcaseBase):
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.skip("insert and upsert have removed the [] error check")
|
||||
@pytest.mark.xfail("insert and upsert have removed the [] error check")
|
||||
def test_upsert_multi_partitions(self):
|
||||
"""
|
||||
target: test upsert two partitions
|
||||
@ -2065,21 +2110,20 @@ class TestUpsertInvalid(TestcaseBase):
|
||||
collection_w.upsert(data=data, partition_name=["partition_1", "partition_2"],
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.skip(reason="smellthemoon: behavior changed")
|
||||
def test_upsert_with_auto_id(self):
|
||||
def test_upsert_with_auto_id_pk_type_dismacth(self):
|
||||
"""
|
||||
target: test upsert with auto id
|
||||
method: 1. create a collection with autoID=true
|
||||
2. upsert data no pk
|
||||
target: test upsert with auto_id and pk type dismatch
|
||||
method: 1. create a collection with pk int64 and auto_id=True
|
||||
2. upsert with pk string type dismatch
|
||||
expected: raise exception
|
||||
"""
|
||||
collection_w = self.init_collection_general(pre_upsert, auto_id=True, is_index=False)[0]
|
||||
error = {ct.err_code: 999,
|
||||
ct.err_msg: "Upsert don't support autoid == true"}
|
||||
float_vec_values = cf.gen_vectors(ct.default_nb, ct.default_dim)
|
||||
data = [[np.float32(i) for i in range(ct.default_nb)], [str(i) for i in range(ct.default_nb)],
|
||||
float_vec_values]
|
||||
dim = 16
|
||||
collection_w = self.init_collection_general(pre_upsert, auto_id=False,
|
||||
dim=dim, insert_data=True, with_json=False)[0]
|
||||
nb = 10
|
||||
data = cf.gen_default_list_data(dim=dim, nb=nb, with_json=False)
|
||||
data[0] = [str(i) for i in range(nb)]
|
||||
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
||||
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
|
||||
@ -27,7 +27,7 @@ class TestIssues(TestcaseBase):
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
auto_id=False, partition_key_field=par_key_field)
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, num_partitions=9)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9)
|
||||
|
||||
# insert
|
||||
nb = 500
|
||||
@ -61,7 +61,7 @@ class TestIssues(TestcaseBase):
|
||||
seeds = 200
|
||||
rand_ids = random.sample(range(0, num_entities), seeds)
|
||||
rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))]
|
||||
res = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field])
|
||||
res, _ = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field])
|
||||
# verify every the random id exists
|
||||
assert len(res) == len(rand_ids)
|
||||
|
||||
@ -69,8 +69,8 @@ class TestIssues(TestcaseBase):
|
||||
for i in range(len(res)):
|
||||
pk = res[i].get("pk")
|
||||
parkey_value = res[i].get(par_key_field)
|
||||
res_parkey = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'",
|
||||
output_fields=["pk", par_key_field])
|
||||
res_parkey, _ = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'",
|
||||
output_fields=["pk", par_key_field])
|
||||
if len(res_parkey) != 1:
|
||||
log.info(f"dirty data found: pk {pk} with parkey {parkey_value}")
|
||||
dirty_count += 1
|
||||
|
||||
@ -24,7 +24,7 @@ class TestPartitionKeyParams(TestcaseBase):
|
||||
vector_field = cf.gen_float_vec_field()
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True)
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
||||
assert len(collection_w.partitions) == ct.default_partition_num
|
||||
|
||||
# insert
|
||||
@ -53,23 +53,24 @@ class TestPartitionKeyParams(TestcaseBase):
|
||||
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "limit": ct.default_limit})[0]
|
||||
check_items={"nq": nq, "limit": entities_per_parkey})[0]
|
||||
# search with partition key filter only or with non partition key
|
||||
res2 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
||||
param=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{int64_field.name} in [1,3,5]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "limit": ct.default_limit})[0]
|
||||
check_items={"nq": nq, "limit": entities_per_parkey})[0]
|
||||
# search with partition key filter only or with non partition key
|
||||
res3 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
||||
param=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "limit": ct.default_limit})[0]
|
||||
check_items={"nq": nq, "limit": entities_per_parkey})[0]
|
||||
# assert the results persist
|
||||
assert res1.ids == res2.ids == res3.ids
|
||||
for i in range(nq):
|
||||
assert res1[i].ids == res2[i].ids == res3[i].ids
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
||||
@ -89,14 +90,14 @@ class TestPartitionKeyParams(TestcaseBase):
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
auto_id=False, partition_key_field=par_key_field)
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema, num_partitions=9)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9)
|
||||
|
||||
# insert
|
||||
nb = 1000
|
||||
string_prefix = cf.gen_str_by_length(length=6)
|
||||
entities_per_parkey = 20
|
||||
for n in range(entities_per_parkey):
|
||||
pk_values = [str(i) for i in range(n * nb, (n+1)*nb)]
|
||||
pk_values = [str(i) for i in range(n * nb, (n + 1) * nb)]
|
||||
int64_values = [i for i in range(0, nb)]
|
||||
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
||||
float_vec_values = gen_vectors(nb, ct.default_dim)
|
||||
@ -120,7 +121,7 @@ class TestPartitionKeyParams(TestcaseBase):
|
||||
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "limit": ct.default_limit})[0]
|
||||
check_items={"nq": nq, "limit": entities_per_parkey})[0]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_partition_key_off_in_field_but_enable_in_schema(self):
|
||||
@ -139,8 +140,7 @@ class TestPartitionKeyParams(TestcaseBase):
|
||||
|
||||
err_msg = "fail to create collection"
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=10)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=10)
|
||||
assert len(collection_w.partitions) == 10
|
||||
|
||||
@pytest.mark.skip("need more investigation")
|
||||
@ -152,44 +152,7 @@ class TestPartitionKeyParams(TestcaseBase):
|
||||
2. bulk insert data
|
||||
3. verify the data bulk inserted and be searched successfully
|
||||
"""
|
||||
self._connect()
|
||||
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
||||
int64_field = cf.gen_int64_field()
|
||||
string_field = cf.gen_string_field(is_partition_key=True)
|
||||
vector_field = cf.gen_float_vec_field()
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
auto_id=True)
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=10)
|
||||
# bulk insert
|
||||
nb = 1000
|
||||
string_prefix = cf.gen_str_by_length(length=6)
|
||||
entities_per_parkey = 20
|
||||
for n in range(entities_per_parkey):
|
||||
pk_values = [str(i) for i in range(n * nb, (n+1)*nb)]
|
||||
int64_values = [i for i in range(0, nb)]
|
||||
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
||||
float_vec_values = gen_vectors(nb, ct.default_dim)
|
||||
data = [pk_values, int64_values, string_values, float_vec_values]
|
||||
collection_w.insert(data)
|
||||
|
||||
# flush
|
||||
collection_w.flush()
|
||||
# build index
|
||||
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
||||
# load
|
||||
collection_w.load()
|
||||
# search
|
||||
nq = 10
|
||||
search_vectors = gen_vectors(nq, ct.default_dim)
|
||||
# search with mixed filtered
|
||||
res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
||||
param=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "limit": ct.default_limit})[0]
|
||||
pass
|
||||
|
||||
|
||||
class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
@ -212,8 +175,7 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
auto_id=True)
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=max_partition)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=max_partition)
|
||||
assert len(collection_w.partitions) == max_partition
|
||||
|
||||
# insert
|
||||
@ -233,10 +195,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
num_partitions = max_partition + 1
|
||||
err_msg = f"partition number ({num_partitions}) exceeds max configuration ({max_partition})"
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=num_partitions,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 1100, "err_msg": err_msg})
|
||||
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=num_partitions,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 1100, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_min_partitions(self):
|
||||
@ -257,8 +218,7 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field=int64_field.name)
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=min_partition)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition)
|
||||
assert len(collection_w.partitions) == min_partition
|
||||
|
||||
# insert
|
||||
@ -279,14 +239,12 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
# create a collection with min partitions - 1
|
||||
err_msg = "The specified num_partitions should be greater than or equal to 1"
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=min_partition - 1,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=min_partition - 3,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition - 1,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition - 3,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("is_par_key", [None, "", "invalid", 0.1, [], {}, ()])
|
||||
@ -298,9 +256,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
"""
|
||||
self._connect()
|
||||
err_msg = "Param is_partition_key must be bool type"
|
||||
int64_field = cf.gen_int64_field(is_partition_key=is_par_key,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_int64_field(is_partition_key=is_par_key,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("num_partitions", [True, False, "", "invalid", 0.1, [], {}, ()])
|
||||
@ -319,10 +277,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
|
||||
err_msg = "invalid num_partitions type"
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=num_partitions,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=num_partitions,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_partition_key_on_multi_fields(self):
|
||||
@ -338,30 +295,30 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
string_field = cf.gen_string_field(is_partition_key=True)
|
||||
vector_field = cf.gen_float_vec_field()
|
||||
err_msg = "Expected only one partition key field"
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
# both defined in collection schema
|
||||
err_msg = "Param partition_key_field must be str type"
|
||||
int64_field = cf.gen_int64_field()
|
||||
string_field = cf.gen_string_field()
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field=[int64_field.name, string_field.name],
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field=[int64_field.name, string_field.name],
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
# one defined in field schema, one defined in collection schema
|
||||
err_msg = "Expected only one partition key field"
|
||||
int64_field = cf.gen_int64_field(is_partition_key=True)
|
||||
string_field = cf.gen_string_field()
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field=string_field.name,
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field=string_field.name,
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("is_int64_primary", [True, False])
|
||||
@ -384,9 +341,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
|
||||
err_msg = "the partition key field must not be primary field"
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.init_collection_wrap(name=c_name, schema=schema,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
# if settings on collection schema
|
||||
if is_int64_primary:
|
||||
@ -399,9 +356,9 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
|
||||
err_msg = "the partition key field must not be primary field"
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.init_collection_wrap(name=c_name, schema=schema,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_partition_key_on_and_off(self):
|
||||
@ -416,21 +373,21 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
string_field = cf.gen_string_field()
|
||||
vector_field = cf.gen_float_vec_field()
|
||||
err_msg = "Expected only one partition key field"
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field=vector_field.name,
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field=vector_field.name,
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
# if two fields with same type
|
||||
string_field = cf.gen_string_field(name="string1", is_partition_key=True)
|
||||
string_field2 = cf.gen_string_field(name="string2")
|
||||
err_msg = "Expected only one partition key field"
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, string_field, string_field2, vector_field],
|
||||
partition_key_field=string_field2.name,
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_collection_schema(fields=[pk_field, string_field, string_field2, vector_field],
|
||||
partition_key_field=string_field2.name,
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("field_type", [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR, DataType.FLOAT,
|
||||
@ -458,12 +415,12 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
vector_field = cf.gen_binary_vec_field(is_partition_key=(field_type == DataType.BINARY_VECTOR))
|
||||
|
||||
err_msg = "Partition key field type must be DataType.INT64 or DataType.VARCHAR"
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int8_field, int16_field, int32_field,
|
||||
bool_field, float_field, double_field, json_field,
|
||||
int64_field, string_field, vector_field],
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_collection_schema(fields=[pk_field, int8_field, int16_field, int32_field,
|
||||
bool_field, float_field, double_field, json_field,
|
||||
int64_field, string_field, vector_field],
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_partition_key_on_not_existed_fields(self):
|
||||
@ -478,11 +435,11 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
string_field = cf.gen_string_field()
|
||||
vector_field = cf.gen_float_vec_field()
|
||||
err_msg = "the specified partition key field {non_existing_field} not exist"
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field="non_existing_field",
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field="non_existing_field",
|
||||
auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_partition_key_on_empty_and_num_partitions_set(self):
|
||||
@ -497,18 +454,17 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
string_field = cf.gen_string_field()
|
||||
vector_field = cf.gen_float_vec_field()
|
||||
err_msg = "the specified partition key field {} not exist"
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field="", auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||||
partition_key_field="", auto_id=True,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
schema = cf.gen_default_collection_schema()
|
||||
err_msg = "num_partitions should only be specified with partition key field enabled"
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
num_partitions=200,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=200,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("invalid_data", [99, True, None, [], {}, ()])
|
||||
@ -528,7 +484,7 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
partition_key_field=string_field.name, auto_id=False)
|
||||
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
||||
|
||||
# insert
|
||||
nb = 10
|
||||
@ -541,7 +497,7 @@ class TestPartitionKeyInvalidParams(TestcaseBase):
|
||||
data = [pk_values, int64_values, string_values, float_vec_values]
|
||||
|
||||
err_msg = "expect string input"
|
||||
self.collection_wrap.insert(data, check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
collection_w.insert(data, check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
|
||||
class TestPartitionApiForbidden(TestcaseBase):
|
||||
@ -564,23 +520,23 @@ class TestPartitionApiForbidden(TestcaseBase):
|
||||
vector_field = cf.gen_float_vec_field()
|
||||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True)
|
||||
c_name = cf.gen_unique_str("par_key")
|
||||
collection_w, _ = self.collection_wrap.init_collection(name=c_name, schema=schema)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
||||
|
||||
# create partition
|
||||
err_msg = "disable create partition if partition key mode is used"
|
||||
partition_name = cf.gen_unique_str("partition")
|
||||
self.collection_wrap.create_partition(partition_name,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.partition_wrap.init_partition(collection_w, partition_name,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
collection_w.create_partition(partition_name,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.init_partition_wrap(collection_w, partition_name,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
# get partition is allowed
|
||||
partitions = self.collection_wrap.partitions
|
||||
partitions = collection_w.partitions
|
||||
collection_w.partition(partitions[0].name)
|
||||
self.partition_wrap.init_partition(collection_w, partitions[0].name)
|
||||
assert self.partition_wrap.name == partitions[0].name
|
||||
partition_w = self.init_partition_wrap(collection_w, partitions[0].name)
|
||||
assert partition_w.name == partitions[0].name
|
||||
# has partition is allowed
|
||||
assert collection_w.has_partition(partitions[0].name)
|
||||
assert self.utility_wrap.has_partition(collection_w.name, partitions[0].name)
|
||||
@ -594,21 +550,21 @@ class TestPartitionApiForbidden(TestcaseBase):
|
||||
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
||||
float_vec_values = gen_vectors(nb, ct.default_dim)
|
||||
data = [int64_values, string_values, float_vec_values]
|
||||
self.collection_wrap.insert(data)
|
||||
collection_w.insert(data)
|
||||
|
||||
err_msg = "not support manually specifying the partition names if partition key mode is used"
|
||||
self.partition_wrap.insert(data, check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.collection_wrap.insert(data, partition_name=partitions[0].name,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
partition_w.insert(data, check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
collection_w.insert(data, partition_name=partitions[0].name,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
err_msg = "disable load partitions if partition key mode is used"
|
||||
self.partition_wrap.load(check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.collection_wrap.load(partition_names=[partitions[0].name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
partition_w.load(check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
collection_w.load(partition_names=[partitions[0].name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
# flush
|
||||
collection_w.flush()
|
||||
@ -621,26 +577,26 @@ class TestPartitionApiForbidden(TestcaseBase):
|
||||
nq = 10
|
||||
search_vectors = gen_vectors(nq, ct.default_dim)
|
||||
# search with mixed filtered
|
||||
res1 = self.collection_wrap.search(data=search_vectors, anns_field=vector_field.name,
|
||||
param=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "limit": ct.default_limit})[0]
|
||||
res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
||||
param=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "limit": ct.default_limit})[0]
|
||||
pks = res1[0].ids[:3]
|
||||
err_msg = "not support manually specifying the partition names if partition key mode is used"
|
||||
self.collection_wrap.search(data=search_vectors, anns_field=vector_field.name, partition_names=[partitions[0].name],
|
||||
param=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{int64_field.name} in [1,3,5]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": nq, "err_msg": err_msg})
|
||||
self.partition_wrap.search(data=search_vectors, anns_field=vector_field.name,
|
||||
params=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": nq, "err_msg": err_msg})
|
||||
collection_w.search(data=search_vectors, anns_field=vector_field.name, partition_names=[partitions[0].name],
|
||||
param=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{int64_field.name} in [1,3,5]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": nq, "err_msg": err_msg})
|
||||
partition_w.search(data=search_vectors, anns_field=vector_field.name,
|
||||
params=ct.default_search_params, limit=entities_per_parkey,
|
||||
expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
||||
output_fields=[int64_field.name, string_field.name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": nq, "err_msg": err_msg})
|
||||
|
||||
# partition loading progress is allowed
|
||||
self.utility_wrap.loading_progress(collection_name=collection_w.name)
|
||||
@ -652,18 +608,22 @@ class TestPartitionApiForbidden(TestcaseBase):
|
||||
self.utility_wrap.wait_for_loading_complete(collection_name=collection_w.name,
|
||||
partition_names=[partitions[0].name])
|
||||
# partition flush is allowed: #24165
|
||||
self.partition_wrap.flush()
|
||||
partition_w.flush()
|
||||
|
||||
# partition delete is not allowed
|
||||
self.partition_wrap.delete(expr=f'{pk_field.name} in {pks}',
|
||||
check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.collection_wrap.delete(expr=f'{pk_field.name} in {pks}', partition_name=partitions[0].name,
|
||||
check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
partition_w.delete(expr=f'{pk_field.name} in {pks}',
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
collection_w.delete(expr=f'{pk_field.name} in {pks}', partition_name=partitions[0].name,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
# partition query is not allowed
|
||||
self.partition_wrap.query(expr=f'{pk_field.name} in {pks}',
|
||||
check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
self.collection_wrap.query(expr=f'{pk_field.name} in {pks}', partition_names=[partitions[0].name],
|
||||
check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
partition_w.query(expr=f'{pk_field.name} in {pks}',
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
collection_w.query(expr=f'{pk_field.name} in {pks}', partition_names=[partitions[0].name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 2, "err_msg": err_msg})
|
||||
# partition upsert is not allowed
|
||||
# self.partition_wrap.upsert(data=data, check_task=CheckTasks.err_res,
|
||||
# check_items={"err_code": 2, "err_msg": err_msg})
|
||||
@ -671,10 +631,10 @@ class TestPartitionApiForbidden(TestcaseBase):
|
||||
# chek_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
# partition release
|
||||
err_msg = "disable release partitions if partition key mode is used"
|
||||
self.partition_wrap.release(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
partition_w.release(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
# partition drop
|
||||
err_msg = "disable drop partition if partition key mode is used"
|
||||
self.partition_wrap.drop(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
partition_w.drop(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
||||
|
||||
# # partition bulk insert
|
||||
# self.utility_wrap.do_bulk_insert(collection_w.name, files, partition_names=[partitions[0].name],
|
||||
|
||||
@ -3838,7 +3838,7 @@ class TestQueryCount(TestcaseBase):
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
schema = cf.gen_default_sparse_schema()
|
||||
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=schema)
|
||||
collection_w = self.init_collection_wrap(c_name, schema=schema)
|
||||
data = cf.gen_default_list_sparse_data()
|
||||
collection_w.insert(data)
|
||||
params = cf.get_index_params_params(index)
|
||||
|
||||
@ -4676,9 +4676,9 @@ class TestCollectionSearch(TestcaseBase):
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
binary_schema = cf.gen_default_binary_collection_schema(dim=dim)
|
||||
self.collection_wrap.init_collection(c_name, schema=binary_schema,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 65535, "err_msg": f"invalid dimension {dim}."})
|
||||
self.init_collection_wrap(c_name, schema=binary_schema,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 999, "err_msg": f"invalid dimension: {dim}."})
|
||||
|
||||
|
||||
class TestSearchBase(TestcaseBase):
|
||||
@ -5173,8 +5173,9 @@ class TestSearchBase(TestcaseBase):
|
||||
expected: search success
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=cf.gen_default_collection_schema())
|
||||
nb = 2000
|
||||
dim = 32
|
||||
collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False)[0]
|
||||
params = cf.get_index_params_params(index)
|
||||
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
|
||||
collection_w.create_index(field_name, default_index, index_name="mmap_index")
|
||||
@ -5183,13 +5184,18 @@ class TestSearchBase(TestcaseBase):
|
||||
# search
|
||||
collection_w.load()
|
||||
search_params = cf.gen_search_param(index)[0]
|
||||
vector = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
||||
collection_w.search(vector, default_search_field, search_params, ct.default_limit)
|
||||
vector = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
collection_w.search(vector, default_search_field, search_params, ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": ct.default_limit})
|
||||
# enable mmap
|
||||
collection_w.release()
|
||||
collection_w.alter_index("mmap_index", {'mmap.enabled': False})
|
||||
collection_w.load()
|
||||
collection_w.search(vector, default_search_field, search_params, ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": ct.default_limit})
|
||||
@ -5204,29 +5210,27 @@ class TestSearchBase(TestcaseBase):
|
||||
"""
|
||||
self._connect()
|
||||
dim = 64
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
default_schema = cf.gen_default_binary_collection_schema(auto_id=False, dim=dim,
|
||||
primary_field=ct.default_int64_field_name)
|
||||
collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema)
|
||||
nb = 2000
|
||||
collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, is_binary=True)[0]
|
||||
params = cf.get_index_params_params(index)
|
||||
default_index = {"index_type": index,
|
||||
"params": params, "metric_type": "JACCARD"}
|
||||
collection_w.create_index("binary_vector", default_index, index_name="binary_idx_name")
|
||||
collection_w.create_index(ct.default_binary_vec_field_name, default_index, index_name="binary_idx_name")
|
||||
collection_w.alter_index("binary_idx_name", {'mmap.enabled': True})
|
||||
collection_w.set_properties({'mmap.enabled': True})
|
||||
collection_w.load()
|
||||
pro = collection_w.describe().get("properties")
|
||||
pro = collection_w.describe()[0].get("properties")
|
||||
assert pro["mmap.enabled"] == 'True'
|
||||
assert collection_w.index().params["mmap.enabled"] == 'True'
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
|
||||
# search
|
||||
binary_vectors = cf.gen_binary_vectors(3000, dim)[1]
|
||||
binary_vectors = cf.gen_binary_vectors(default_nq, dim)[1]
|
||||
search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
|
||||
output_fields = [default_string_field_name]
|
||||
collection_w.search(binary_vectors[:default_nq], "binary_vector", search_params,
|
||||
output_fields = ["*"]
|
||||
collection_w.search(binary_vectors, ct.default_binary_vec_field_name, search_params,
|
||||
default_limit, default_search_string_exp, output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"limit": ct.default_top_k})
|
||||
check_items={"nq": default_nq,
|
||||
"limit": default_limit})
|
||||
|
||||
|
||||
class TestSearchDSL(TestcaseBase):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user