mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 10:08:42 +08:00
Add test for numpy files in diff folders (#17175)
Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
parent
ea8e1623cd
commit
8dc43fa811
@ -847,11 +847,10 @@ class TestBulkLoad(TestcaseBase):
|
|||||||
"limit": 1})
|
"limit": 1})
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L3)
|
@pytest.mark.tags(CaseLabel.L3)
|
||||||
@pytest.mark.parametrize("auto_id", [True])
|
@pytest.mark.parametrize("auto_id", [True, False])
|
||||||
@pytest.mark.parametrize("dim", [6])
|
@pytest.mark.parametrize("dim", [6])
|
||||||
@pytest.mark.parametrize("entities", [10])
|
@pytest.mark.parametrize("entities", [1000])
|
||||||
@pytest.mark.parametrize("file_nums", [2]) # 32, max task nums 32? need improve
|
@pytest.mark.parametrize("file_nums", [10])
|
||||||
@pytest.mark.xfail(reason="only one numpy file imported successfully, issue #16992")
|
|
||||||
def test_multi_numpy_files_from_diff_folders(self, auto_id, dim, entities, file_nums):
|
def test_multi_numpy_files_from_diff_folders(self, auto_id, dim, entities, file_nums):
|
||||||
"""
|
"""
|
||||||
collection schema 1: [pk, float_vector]
|
collection schema 1: [pk, float_vector]
|
||||||
@ -859,18 +858,10 @@ class TestBulkLoad(TestcaseBase):
|
|||||||
Steps:
|
Steps:
|
||||||
1. create collection
|
1. create collection
|
||||||
2. import data
|
2. import data
|
||||||
3. if row_based: verify import failed
|
3. verify that import numpy files in a loop
|
||||||
4. if column_based:
|
|
||||||
4.1 verify the data entities equal the import data
|
|
||||||
4.2 verify search and query successfully
|
|
||||||
"""
|
"""
|
||||||
row_based = False # numpy files supports only column based
|
row_based = False # numpy files supports only column based
|
||||||
data_fields = [df.vec_field]
|
|
||||||
if not auto_id:
|
|
||||||
data_fields.append(df.pk_field)
|
|
||||||
files = prepare_bulk_load_numpy_files(rows=entities, dim=dim,
|
|
||||||
data_fields=data_fields,
|
|
||||||
file_nums=file_nums, force=True)
|
|
||||||
self._connect()
|
self._connect()
|
||||||
c_name = cf.gen_unique_str()
|
c_name = cf.gen_unique_str()
|
||||||
fields = [cf.gen_int64_field(name=df.pk_field, is_primary=True),
|
fields = [cf.gen_int64_field(name=df.pk_field, is_primary=True),
|
||||||
@ -882,7 +873,14 @@ class TestBulkLoad(TestcaseBase):
|
|||||||
self.collection_wrap.create_index(field_name=df.vec_field, index_params=index_params)
|
self.collection_wrap.create_index(field_name=df.vec_field, index_params=index_params)
|
||||||
# load collection
|
# load collection
|
||||||
self.collection_wrap.load()
|
self.collection_wrap.load()
|
||||||
t0 = time.time()
|
|
||||||
|
data_fields = [df.vec_field]
|
||||||
|
if not auto_id:
|
||||||
|
data_fields.append(df.pk_field)
|
||||||
|
for i in range(file_nums):
|
||||||
|
files = prepare_bulk_load_numpy_files(rows=entities, dim=dim,
|
||||||
|
data_fields=data_fields,
|
||||||
|
file_nums=1, force=True)
|
||||||
task_ids, _ = self.utility_wrap.bulk_load(collection_name=c_name,
|
task_ids, _ = self.utility_wrap.bulk_load(collection_name=c_name,
|
||||||
row_based=row_based,
|
row_based=row_based,
|
||||||
files=files)
|
files=files)
|
||||||
@ -890,8 +888,7 @@ class TestBulkLoad(TestcaseBase):
|
|||||||
wait_for_bulk_load_tasks_completed(task_ids=task_ids,
|
wait_for_bulk_load_tasks_completed(task_ids=task_ids,
|
||||||
target_state=BulkLoadStates.BulkLoadPersisted,
|
target_state=BulkLoadStates.BulkLoadPersisted,
|
||||||
timeout=30)
|
timeout=30)
|
||||||
tt = time.time() - t0
|
log.info(f"bulk load state:{success}")
|
||||||
log.info(f"bulk load state:{success} in {tt}")
|
|
||||||
|
|
||||||
assert success
|
assert success
|
||||||
log.info(f" collection entities: {self.collection_wrap.num_entities}")
|
log.info(f" collection entities: {self.collection_wrap.num_entities}")
|
||||||
@ -1734,6 +1731,51 @@ class TestBulkLoadInvalidParams(TestcaseBase):
|
|||||||
# res, _ = self.collection_wrap.query(expr=f"{float_field} in [1.0]", output_fields=[float_field])
|
# res, _ = self.collection_wrap.query(expr=f"{float_field} in [1.0]", output_fields=[float_field])
|
||||||
# assert res[0].get(float_field, 0) == 1.0
|
# assert res[0].get(float_field, 0) == 1.0
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L3)
|
||||||
|
@pytest.mark.parametrize("auto_id", [True, False])
|
||||||
|
@pytest.mark.parametrize("dim", [6])
|
||||||
|
@pytest.mark.parametrize("entities", [10])
|
||||||
|
@pytest.mark.parametrize("file_nums", [2])
|
||||||
|
def test_multi_numpy_files_from_diff_folders_in_one_request(self, auto_id, dim, entities, file_nums):
|
||||||
|
"""
|
||||||
|
collection schema 1: [pk, float_vector]
|
||||||
|
data file: .npy files in different folders
|
||||||
|
Steps:
|
||||||
|
1. create collection
|
||||||
|
2. import data
|
||||||
|
3. fail to import data with errors
|
||||||
|
"""
|
||||||
|
row_based = False # numpy files supports only column based
|
||||||
|
data_fields = [df.vec_field]
|
||||||
|
if not auto_id:
|
||||||
|
data_fields.append(df.pk_field)
|
||||||
|
files = prepare_bulk_load_numpy_files(rows=entities, dim=dim,
|
||||||
|
data_fields=data_fields,
|
||||||
|
file_nums=file_nums, force=True)
|
||||||
|
self._connect()
|
||||||
|
c_name = cf.gen_unique_str()
|
||||||
|
fields = [cf.gen_int64_field(name=df.pk_field, is_primary=True),
|
||||||
|
cf.gen_float_vec_field(name=df.vec_field, dim=dim)]
|
||||||
|
schema = cf.gen_collection_schema(fields=fields, auto_id=auto_id)
|
||||||
|
self.collection_wrap.init_collection(c_name, schema=schema)
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
task_ids, _ = self.utility_wrap.bulk_load(collection_name=c_name,
|
||||||
|
row_based=row_based,
|
||||||
|
files=files)
|
||||||
|
success, states = self.utility_wrap. \
|
||||||
|
wait_for_bulk_load_tasks_completed(task_ids=task_ids,
|
||||||
|
target_state=BulkLoadStates.BulkLoadPersisted,
|
||||||
|
timeout=30)
|
||||||
|
tt = time.time() - t0
|
||||||
|
log.info(f"bulk load state:{success} in {tt}")
|
||||||
|
|
||||||
|
assert not success
|
||||||
|
failed_reason = "duplicate file"
|
||||||
|
for state in states.values():
|
||||||
|
assert state.state_name == "BulkLoadFailed"
|
||||||
|
assert failed_reason in state.infos.get("failed_reason", "")
|
||||||
|
assert self.collection_wrap.num_entities == 0
|
||||||
|
|
||||||
# TODO: string data on float field
|
# TODO: string data on float field
|
||||||
|
|
||||||
@ -1811,5 +1853,3 @@ class TestBulkLoadAdvanced(TestcaseBase):
|
|||||||
# self.collection_wrap.query(expr=f"id in {ids}")
|
# self.collection_wrap.query(expr=f"id in {ids}")
|
||||||
|
|
||||||
"""Validate data consistency and availability during import"""
|
"""Validate data consistency and availability during import"""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user