test: fix duplicate import file names in tests (#40182)

pr: https://github.com/milvus-io/milvus/pull/40181

/kind improvement

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
zhuwenxing 2025-02-26 14:02:05 +08:00 committed by GitHub
parent 27fb8d9512
commit 32c00dbc1b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -652,7 +652,7 @@ def gen_file_name(is_row_based, rows, dim, auto_id, str_pk,
pk = "str_pk_"
prefix = gen_file_prefix(is_row_based=is_row_based, auto_id=auto_id, prefix=err_type)
file_name = f"{prefix}_{pk}{vt}{field_suffix}{dim}d_{row_suffix}_{file_num}_{int(time.time())}{file_type}"
file_name = f"{prefix}_{pk}{vt}{field_suffix}{dim}d_{row_suffix}_{file_num}_{str(uuid.uuid4())}{file_type}"
return file_name
@ -815,7 +815,7 @@ def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_
rows = 5000
start_uid = 0
for i in range(file_nums):
file_name = f"data-fields-{len(data_fields)}-rows-{rows}-dim-{dim}-file-num-{i}-{int(time.time())}.json"
file_name = f"data-fields-{len(data_fields)}-rows-{rows}-dim-{dim}-file-num-{i}-{str(uuid.uuid4())}.json"
file = f"{data_source_new}/{file_name}"
Path(file).parent.mkdir(parents=True, exist_ok=True)
data = gen_dict_data_by_data_field(data_fields=data_fields, rows=rows, start=start_uid,
@ -835,7 +835,7 @@ def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_
all_data = []
for _ in range(total_batch):
all_data += data
file_name = f"data-fields-{len(data_fields)}-rows-{total_rows}-dim-{dim}-file-num-{i}-{int(time.time())}.json"
file_name = f"data-fields-{len(data_fields)}-rows-{total_rows}-dim-{dim}-file-num-{i}-{str(uuid.uuid4())}.json"
with open(f"{data_source_new}/{file_name}", "w") as f:
json.dump(all_data, f)
batch_file_size = os.path.getsize(f"{data_source_new}/{file_name}")
@ -982,7 +982,7 @@ def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_
all_field_data["$meta"] = gen_dynamic_field_data_in_parquet_file(rows=rows, start=0)
df = pd.DataFrame(all_field_data)
log.info(f"df: \n{df}")
file_name = f"data-fields-{len(data_fields)}-rows-{rows}-dim-{dim}-file-num-{file_nums}-error-{err_type}-{int(time.time())}.parquet"
file_name = f"data-fields-{len(data_fields)}-rows-{rows}-dim-{dim}-file-num-{file_nums}-error-{err_type}-{str(uuid.uuid4())}.parquet"
if row_group_size is not None:
df.to_parquet(f"{data_source_new}/{file_name}", engine='pyarrow', row_group_size=row_group_size)
else:
@ -995,7 +995,7 @@ def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_
total_batch = int(file_size*1024*1024*1024/batch_file_size)
total_rows = total_batch * rows
all_df = pd.concat([df for _ in range(total_batch)], axis=0, ignore_index=True)
file_name = f"data-fields-{len(data_fields)}-rows-{total_rows}-dim-{dim}-file-num-{file_nums}-error-{err_type}-{int(time.time())}.parquet"
file_name = f"data-fields-{len(data_fields)}-rows-{total_rows}-dim-{dim}-file-num-{file_nums}-error-{err_type}-{str(uuid.uuid4())}.parquet"
log.info(f"all df: \n {all_df}")
if row_group_size is not None:
all_df.to_parquet(f"{data_source_new}/{file_name}", engine='pyarrow', row_group_size=row_group_size)
@ -1014,7 +1014,7 @@ def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_
if enable_dynamic_field:
all_field_data["$meta"] = gen_dynamic_field_data_in_parquet_file(rows=rows, start=0)
df = pd.DataFrame(all_field_data)
file_name = f"data-fields-{len(data_fields)}-rows-{rows}-dim-{dim}-file-num-{i}-error-{err_type}-{int(time.time())}.parquet"
file_name = f"data-fields-{len(data_fields)}-rows-{rows}-dim-{dim}-file-num-{i}-error-{err_type}-{str(uuid.uuid4())}.parquet"
if row_group_size is not None:
df.to_parquet(f"{data_source_new}/{file_name}", engine='pyarrow', row_group_size=row_group_size)
else: