diff --git a/tests/restful_client_v2/README.md b/tests/restful_client_v2/README.md new file mode 100644 index 0000000000..b629a767fe --- /dev/null +++ b/tests/restful_client_v2/README.md @@ -0,0 +1,9 @@ + +## How to run the test cases + +install milvus with authentication enabled + +```bash +pip install -r requirements.txt +pytest testcases -m L0 -n 6 -v --endpoint http://127.0.0.1:19530 --minio_host 127.0.0.1 +``` diff --git a/tests/restful_client_v2/api/milvus.py b/tests/restful_client_v2/api/milvus.py new file mode 100644 index 0000000000..ac553149c3 --- /dev/null +++ b/tests/restful_client_v2/api/milvus.py @@ -0,0 +1,750 @@ +import json +import requests +import time +import uuid +from utils.util_log import test_log as logger +from minio import Minio +from minio.error import S3Error + + +def logger_request_response(response, url, tt, headers, data, str_data, str_response, method): + if len(data) > 2000: + data = data[:1000] + "..." + data[-1000:] + try: + if response.status_code == 200: + if ('code' in response.json() and response.json()["code"] == 200) or ( + 'Code' in response.json() and response.json()["Code"] == 0): + logger.debug( + f"\nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {str_data}, \nresponse: {str_response}") + else: + logger.debug( + f"\nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {data}, \nresponse: {response.text}") + else: + logger.debug( + f"method: \nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {data}, \nresponse: {response.text}") + except Exception as e: + logger.debug( + f"method: \nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {data}, \nresponse: {response.text}, \nerror: {e}") + + +class Requests: + def __init__(self, url=None, api_key=None): + self.url = url + self.api_key = api_key + self.headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + + def update_headers(self): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + return headers + + def post(self, url, headers=None, data=None, params=None): + headers = headers if headers is not None else self.update_headers() + data = json.dumps(data) + str_data = data[:200] + '...' + data[-200:] if len(data) > 400 else data + t0 = time.time() + response = requests.post(url, headers=headers, data=data, params=params) + tt = time.time() - t0 + str_response = response.text[:200] + '...' + response.text[-200:] if len(response.text) > 400 else response.text + logger_request_response(response, url, tt, headers, data, str_data, str_response, "post") + return response + + def get(self, url, headers=None, params=None, data=None): + headers = headers if headers is not None else self.update_headers() + data = json.dumps(data) + str_data = data[:200] + '...' + data[-200:] if len(data) > 400 else data + t0 = time.time() + if data is None or data == "null": + response = requests.get(url, headers=headers, params=params) + else: + response = requests.get(url, headers=headers, params=params, data=data) + tt = time.time() - t0 + str_response = response.text[:200] + '...' + response.text[-200:] if len(response.text) > 400 else response.text + logger_request_response(response, url, tt, headers, data, str_data, str_response, "get") + return response + + def put(self, url, headers=None, data=None): + headers = headers if headers is not None else self.update_headers() + data = json.dumps(data) + str_data = data[:200] + '...' + data[-200:] if len(data) > 400 else data + t0 = time.time() + response = requests.put(url, headers=headers, data=data) + tt = time.time() - t0 + str_response = response.text[:200] + '...' + response.text[-200:] if len(response.text) > 400 else response.text + logger_request_response(response, url, tt, headers, data, str_data, str_response, "put") + return response + + def delete(self, url, headers=None, data=None): + headers = headers if headers is not None else self.update_headers() + data = json.dumps(data) + str_data = data[:200] + '...' + data[-200:] if len(data) > 400 else data + t0 = time.time() + response = requests.delete(url, headers=headers, data=data) + tt = time.time() - t0 + str_response = response.text[:200] + '...' + response.text[-200:] if len(response.text) > 400 else response.text + logger_request_response(response, url, tt, headers, data, str_data, str_response, "delete") + return response + + +class VectorClient(Requests): + def __init__(self, endpoint, token): + super().__init__(url=endpoint, api_key=token) + self.endpoint = endpoint + self.token = token + self.api_key = token + self.db_name = None + self.headers = self.update_headers() + + def update_headers(self): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'Accept-Type-Allow-Int64': "true", + 'RequestId': str(uuid.uuid1()) + } + return headers + + def vector_search(self, payload, db_name="default", timeout=10): + time.sleep(1) + url = f'{self.endpoint}/v2/vectordb/entities/search' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + rsp = response.json() + if "data" in rsp and len(rsp["data"]) == 0: + t0 = time.time() + while time.time() - t0 < timeout: + response = self.post(url, headers=self.update_headers(), data=payload) + rsp = response.json() + if len(rsp["data"]) > 0: + break + time.sleep(1) + else: + response = self.post(url, headers=self.update_headers(), data=payload) + rsp = response.json() + if "data" in rsp and len(rsp["data"]) == 0: + logger.info(f"after {timeout}s, still no data") + + return response.json() + + def vector_query(self, payload, db_name="default", timeout=10): + time.sleep(1) + url = f'{self.endpoint}/v2/vectordb/entities/query' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + rsp = response.json() + if "data" in rsp and len(rsp["data"]) == 0: + t0 = time.time() + while time.time() - t0 < timeout: + response = self.post(url, headers=self.update_headers(), data=payload) + rsp = response.json() + if len(rsp["data"]) > 0: + break + time.sleep(1) + else: + response = self.post(url, headers=self.update_headers(), data=payload) + rsp = response.json() + if "data" in rsp and len(rsp["data"]) == 0: + logger.info(f"after {timeout}s, still no data") + + return response.json() + + def vector_get(self, payload, db_name="default"): + time.sleep(1) + url = f'{self.endpoint}/v2/vectordb/entities/get' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + return response.json() + + def vector_delete(self, payload, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/entities/delete' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + return response.json() + + def vector_insert(self, payload, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/entities/insert' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + return response.json() + + def vector_upsert(self, payload, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/entities/upsert' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + return response.json() + + +class CollectionClient(Requests): + + def __init__(self, endpoint, token): + super().__init__(url=endpoint, api_key=token) + self.endpoint = endpoint + self.api_key = token + self.db_name = None + self.headers = self.update_headers() + + def update_headers(self, headers=None): + if headers is not None: + return headers + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + return headers + + def collection_has(self, db_name="default", collection_name=None): + url = f'{self.endpoint}/v2/vectordb/collections/has' + if self.db_name is not None: + db_name = self.db_name + data = { + "dbName": db_name, + "collectionName": collection_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def collection_rename(self, payload, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/collections/rename' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + return response.json() + + def collection_stats(self, db_name="default", collection_name=None): + url = f'{self.endpoint}/v2/vectordb/collections/get_stats' + if self.db_name is not None: + db_name = self.db_name + data = { + "dbName": db_name, + "collectionName": collection_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def collection_load(self, db_name="default", collection_name=None): + url = f'{self.endpoint}/v2/vectordb/collections/load' + if self.db_name is not None: + db_name = self.db_name + payload = { + "dbName": db_name, + "collectionName": collection_name + } + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def collection_release(self, db_name="default", collection_name=None): + url = f'{self.endpoint}/v2/vectordb/collections/release' + if self.db_name is not None: + db_name = self.db_name + payload = { + "dbName": db_name, + "collectionName": collection_name + } + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def collection_load_state(self, db_name="default", collection_name=None, partition_names=None): + url = f'{self.endpoint}/v2/vectordb/collections/get_load_state' + if self.db_name is not None: + db_name = self.db_name + data = { + "dbName": db_name, + "collectionName": collection_name, + } + if partition_names is not None: + data["partitionNames"] = partition_names + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def collection_list(self, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/collections/list' + params = {} + if self.db_name is not None: + params = { + "dbName": self.db_name + } + if db_name != "default": + params = { + "dbName": db_name + } + response = self.post(url, headers=self.update_headers(), params=params) + res = response.json() + return res + + def collection_create(self, payload, db_name="default"): + time.sleep(1) # wait for collection created and in case of rate limit + url = f'{self.endpoint}/v2/vectordb/collections/create' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + return response.json() + + def collection_describe(self, collection_name, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/collections/describe' + data = {"collectionName": collection_name} + if self.db_name is not None: + data = { + "collectionName": collection_name, + "dbName": self.db_name + } + if db_name != "default": + data = { + "collectionName": collection_name, + "dbName": db_name + } + response = self.post(url, headers=self.update_headers(), data=data) + return response.json() + + def collection_drop(self, payload, db_name="default"): + time.sleep(1) # wait for collection drop and in case of rate limit + url = f'{self.endpoint}/v2/vectordb/collections/drop' + if self.db_name is not None: + payload["dbName"] = self.db_name + if db_name != "default": + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + return response.json() + + +class PartitionClient(Requests): + + def __init__(self, endpoint, token): + super().__init__(url=endpoint, api_key=token) + self.endpoint = endpoint + self.api_key = token + self.db_name = None + self.headers = self.update_headers() + + def update_headers(self): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + return headers + + def partition_list(self, db_name="default", collection_name=None): + url = f'{self.endpoint}/v2/vectordb/partitions/list' + data = { + "collectionName": collection_name + } + if self.db_name is not None: + data = { + "dbName": self.db_name, + "collectionName": collection_name + } + if db_name != "default": + data = { + "dbName": db_name, + "collectionName": collection_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def partition_create(self, db_name="default", collection_name=None, partition_name=None): + url = f'{self.endpoint}/v2/vectordb/partitions/create' + if self.db_name is not None: + db_name = self.db_name + payload = { + "dbName": db_name, + "collectionName": collection_name, + "partitionName": partition_name + } + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def partition_drop(self, db_name="default", collection_name=None, partition_name=None): + url = f'{self.endpoint}/v2/vectordb/partitions/drop' + if self.db_name is not None: + db_name = self.db_name + payload = { + "dbName": db_name, + "collectionName": collection_name, + "partitionName": partition_name + } + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def partition_load(self, db_name="default", collection_name=None, partition_names=None): + url = f'{self.endpoint}/v2/vectordb/partitions/load' + if self.db_name is not None: + db_name = self.db_name + payload = { + "dbName": db_name, + "collectionName": collection_name, + "partitionNames": partition_names + } + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def partition_release(self, db_name="default", collection_name=None, partition_names=None): + url = f'{self.endpoint}/v2/vectordb/partitions/release' + if self.db_name is not None: + db_name = self.db_name + payload = { + "dbName": db_name, + "collectionName": collection_name, + "partitionNames": partition_names + } + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def partition_has(self, db_name="default", collection_name=None, partition_name=None): + url = f'{self.endpoint}/v2/vectordb/partitions/has' + if self.db_name is not None: + db_name = self.db_name + data = { + "dbName": db_name, + "collectionName": collection_name, + "partitionName": partition_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def partition_stats(self, db_name="default", collection_name=None, partition_name=None): + url = f'{self.endpoint}/v2/vectordb/partitions/get_stats' + if self.db_name is not None: + db_name = self.db_name + data = { + "dbName": db_name, + "collectionName": collection_name, + "partitionName": partition_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + +class UserClient(Requests): + + def __init__(self, endpoint, token): + super().__init__(url=endpoint, api_key=token) + self.endpoint = endpoint + self.api_key = token + self.db_name = None + self.headers = self.update_headers() + + def update_headers(self): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + return headers + + def user_list(self): + url = f'{self.endpoint}/v2/vectordb/users/list' + response = self.post(url, headers=self.update_headers()) + res = response.json() + return res + + def user_create(self, payload): + url = f'{self.endpoint}/v2/vectordb/users/create' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def user_password_update(self, payload): + url = f'{self.endpoint}/v2/vectordb/users/update_password' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def user_describe(self, user_name): + url = f'{self.endpoint}/v2/vectordb/users/describe' + data = { + "userName": user_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def user_drop(self, payload): + url = f'{self.endpoint}/v2/vectordb/users/drop' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def user_grant(self, payload): + url = f'{self.endpoint}/v2/vectordb/users/grant_role' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def user_revoke(self, payload): + url = f'{self.endpoint}/v2/vectordb/users/revoke_role' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + +class RoleClient(Requests): + + def __init__(self, endpoint, token): + super().__init__(url=endpoint, api_key=token) + self.endpoint = endpoint + self.api_key = token + self.db_name = None + self.headers = self.update_headers() + + def update_headers(self): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + return headers + + def role_list(self): + url = f'{self.endpoint}/v2/vectordb/roles/list' + response = self.post(url, headers=self.update_headers()) + res = response.json() + return res + + def role_create(self, payload): + url = f'{self.endpoint}/v2/vectordb/roles/create' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def role_describe(self, role_name): + url = f'{self.endpoint}/v2/vectordb/roles/describe' + data = { + "roleName": role_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def role_drop(self, payload): + url = f'{self.endpoint}/v2/vectordb/roles/drop' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def role_grant(self, payload): + url = f'{self.endpoint}/v2/vectordb/roles/grant_privilege' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def role_revoke(self, payload): + url = f'{self.endpoint}/v2/vectordb/roles/revoke_privilege' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + +class IndexClient(Requests): + + def __init__(self, endpoint, token): + super().__init__(url=endpoint, api_key=token) + self.endpoint = endpoint + self.api_key = token + self.db_name = None + self.headers = self.update_headers() + + def update_headers(self): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + return headers + + def index_create(self, payload, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/indexes/create' + if self.db_name is not None: + db_name = self.db_name + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def index_describe(self, db_name="default", collection_name=None, index_name=None): + url = f'{self.endpoint}/v2/vectordb/indexes/describe' + if self.db_name is not None: + db_name = self.db_name + data = { + "dbName": db_name, + "collectionName": collection_name, + "indexName": index_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def index_list(self, collection_name=None, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/indexes/list' + if self.db_name is not None: + db_name = self.db_name + data = { + "dbName": db_name, + "collectionName": collection_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def index_drop(self, payload, db_name="default"): + url = f'{self.endpoint}/v2/vectordb/indexes/drop' + if self.db_name is not None: + db_name = self.db_name + payload["dbName"] = db_name + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + +class AliasClient(Requests): + + def __init__(self, endpoint, token): + super().__init__(url=endpoint, api_key=token) + self.endpoint = endpoint + self.api_key = token + self.db_name = None + self.headers = self.update_headers() + + def update_headers(self): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + return headers + + def list_alias(self): + url = f'{self.endpoint}/v2/vectordb/aliases/list' + response = self.post(url, headers=self.update_headers()) + res = response.json() + return res + + def describe_alias(self, alias_name): + url = f'{self.endpoint}/v2/vectordb/aliases/describe' + data = { + "aliasName": alias_name + } + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def alter_alias(self, payload): + url = f'{self.endpoint}/v2/vectordb/aliases/alter' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def drop_alias(self, payload): + url = f'{self.endpoint}/v2/vectordb/aliases/drop' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def create_alias(self, payload): + url = f'{self.endpoint}/v2/vectordb/aliases/create' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + +class ImportJobClient(Requests): + + def __init__(self, endpoint, token): + super().__init__(url=endpoint, api_key=token) + self.endpoint = endpoint + self.api_key = token + self.db_name = None + self.headers = self.update_headers() + + def update_headers(self): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'RequestId': str(uuid.uuid1()) + } + return headers + + def list_import_jobs(self, payload, db_name="default"): + payload["dbName"] = db_name + data = payload + url = f'{self.endpoint}/v2/vectordb/jobs/import/list' + response = self.post(url, headers=self.update_headers(), data=data) + res = response.json() + return res + + def create_import_jobs(self, payload): + url = f'{self.endpoint}/v2/vectordb/jobs/import/create' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + def get_import_job_progress(self, task_id): + payload = { + "taskID": task_id + } + url = f'{self.endpoint}/v2/vectordb/jobs/import/get_progress' + response = self.post(url, headers=self.update_headers(), data=payload) + res = response.json() + return res + + +class StorageClient(): + + def __init__(self, endpoint, access_key, secret_key, bucket_name): + self.endpoint = endpoint + self.access_key = access_key + self.secret_key = secret_key + self.bucket_name = bucket_name + self.client = Minio( + self.endpoint, + access_key=access_key, + secret_key=secret_key, + secure=False, + ) + + def upload_file(self, file_path, object_name): + try: + self.client.fput_object(self.bucket_name, object_name, file_path) + except S3Error as exc: + logger.error("fail to copy files to minio", exc) diff --git a/tests/restful_client_v2/base/testbase.py b/tests/restful_client_v2/base/testbase.py new file mode 100644 index 0000000000..bb84c95ab2 --- /dev/null +++ b/tests/restful_client_v2/base/testbase.py @@ -0,0 +1,135 @@ +import json +import sys +import pytest +import time +from pymilvus import connections, db +from utils.util_log import test_log as logger +from api.milvus import (VectorClient, CollectionClient, PartitionClient, IndexClient, AliasClient, + UserClient, RoleClient, ImportJobClient, StorageClient) +from utils.utils import get_data_by_payload + + +def get_config(): + pass + + +class Base: + name = None + protocol = None + host = None + port = None + endpoint = None + api_key = None + username = None + password = None + invalid_api_key = None + vector_client = None + collection_client = None + partition_client = None + index_client = None + user_client = None + role_client = None + import_job_client = None + storage_client = None + + +class TestBase(Base): + + def teardown_method(self): + self.collection_client.api_key = self.api_key + all_collections = self.collection_client.collection_list()['data'] + if self.name in all_collections: + logger.info(f"collection {self.name} exist, drop it") + payload = { + "collectionName": self.name, + } + try: + rsp = self.collection_client.collection_drop(payload) + except Exception as e: + logger.error(e) + + @pytest.fixture(scope="function", autouse=True) + def init_client(self, endpoint, token, minio_host): + self.endpoint = f"{endpoint}" + self.api_key = f"{token}" + self.invalid_api_key = "invalid_token" + self.vector_client = VectorClient(self.endpoint, self.api_key) + self.collection_client = CollectionClient(self.endpoint, self.api_key) + self.partition_client = PartitionClient(self.endpoint, self.api_key) + self.index_client = IndexClient(self.endpoint, self.api_key) + self.alias_client = AliasClient(self.endpoint, self.api_key) + self.user_client = UserClient(self.endpoint, self.api_key) + self.role_client = RoleClient(self.endpoint, self.api_key) + self.import_job_client = ImportJobClient(self.endpoint, self.api_key) + self.storage_client = StorageClient(f"{minio_host}:9000", "minioadmin", "minioadmin", "milvus-bucket") + if token is None: + self.vector_client.api_key = None + self.collection_client.api_key = None + self.partition_client.api_key = None + connections.connect(uri=endpoint, token=token) + + def init_collection(self, collection_name, pk_field="id", metric_type="L2", dim=128, nb=100, batch_size=1000): + # create collection + schema_payload = { + "collectionName": collection_name, + "dimension": dim, + "metricType": metric_type, + "description": "test collection", + "primaryField": pk_field, + "vectorField": "vector", + } + rsp = self.collection_client.collection_create(schema_payload) + assert rsp['code'] == 200 + self.wait_collection_load_completed(collection_name) + batch_size = batch_size + batch = nb // batch_size + remainder = nb % batch_size + data = [] + for i in range(batch): + nb = batch_size + data = get_data_by_payload(schema_payload, nb) + payload = { + "collectionName": collection_name, + "data": data + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.debug(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + # insert remainder data + if remainder: + nb = remainder + data = get_data_by_payload(schema_payload, nb) + payload = { + "collectionName": collection_name, + "data": data + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + + return schema_payload, data + + def wait_collection_load_completed(self, name): + t0 = time.time() + timeout = 60 + while True and time.time() - t0 < timeout: + rsp = self.collection_client.collection_describe(name) + if "data" in rsp and "load" in rsp["data"] and rsp["data"]["load"] == "LoadStateLoaded": + break + else: + time.sleep(5) + + def create_database(self, db_name="default"): + all_db = db.list_database() + logger.info(f"all database: {all_db}") + if db_name not in all_db: + logger.info(f"create database: {db_name}") + try: + db.create_database(db_name=db_name) + except Exception as e: + logger.error(e) + + def update_database(self, db_name="default"): + self.create_database(db_name=db_name) + self.collection_client.db_name = db_name + self.vector_client.db_name = db_name diff --git a/tests/restful_client_v2/config/log_config.py b/tests/restful_client_v2/config/log_config.py new file mode 100644 index 0000000000..d3e3e30d07 --- /dev/null +++ b/tests/restful_client_v2/config/log_config.py @@ -0,0 +1,44 @@ +import os + + +class LogConfig: + def __init__(self): + self.log_debug = "" + self.log_err = "" + self.log_info = "" + self.log_worker = "" + self.get_default_config() + + @staticmethod + def get_env_variable(var="CI_LOG_PATH"): + """ get log path for testing """ + try: + log_path = os.environ[var] + return str(log_path) + except Exception as e: + # now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + log_path = f"/tmp/ci_logs" + print("[get_env_variable] failed to get environment variables : %s, use default path : %s" % (str(e), log_path)) + return log_path + + @staticmethod + def create_path(log_path): + if not os.path.isdir(str(log_path)): + print("[create_path] folder(%s) is not exist." % log_path) + print("[create_path] create path now...") + os.makedirs(log_path) + + def get_default_config(self): + """ Make sure the path exists """ + log_dir = self.get_env_variable() + self.log_debug = "%s/ci_test_log.debug" % log_dir + self.log_info = "%s/ci_test_log.log" % log_dir + self.log_err = "%s/ci_test_log.err" % log_dir + work_log = os.environ.get('PYTEST_XDIST_WORKER') + if work_log is not None: + self.log_worker = f'{log_dir}/{work_log}.log' + + self.create_path(log_dir) + + +log_config = LogConfig() diff --git a/tests/restful_client_v2/conftest.py b/tests/restful_client_v2/conftest.py new file mode 100644 index 0000000000..d4229c53a8 --- /dev/null +++ b/tests/restful_client_v2/conftest.py @@ -0,0 +1,23 @@ +import pytest +import yaml + + +def pytest_addoption(parser): + parser.addoption("--endpoint", action="store", default="http://127.0.0.1:19530", help="endpoint") + parser.addoption("--token", action="store", default="root:Milvus", help="token") + parser.addoption("--minio_host", action="store", default="127.0.0.1", help="minio host") + + +@pytest.fixture +def endpoint(request): + return request.config.getoption("--endpoint") + + +@pytest.fixture +def token(request): + return request.config.getoption("--token") + + +@pytest.fixture +def minio_host(request): + return request.config.getoption("--minio_host") diff --git a/tests/restful_client_v2/pytest.ini b/tests/restful_client_v2/pytest.ini new file mode 100644 index 0000000000..bba8d5e14e --- /dev/null +++ b/tests/restful_client_v2/pytest.ini @@ -0,0 +1,14 @@ +[pytest] +addopts = --strict --endpoint http://127.0.0.1:19530 --token root:Milvus --minio_host 127.0.0.1 + +log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s) +log_date_format = %Y-%m-%d %H:%M:%S + + +filterwarnings = + ignore::DeprecationWarning + +markers = + L0 : 'L0 case, high priority' + L1 : 'L1 case, second priority' + diff --git a/tests/restful_client_v2/requirements.txt b/tests/restful_client_v2/requirements.txt new file mode 100644 index 0000000000..71e3c8b778 --- /dev/null +++ b/tests/restful_client_v2/requirements.txt @@ -0,0 +1,16 @@ +--extra-index-url https://test.pypi.org/simple/ +requests==2.31.0 +urllib3==1.26.18 +pytest~=7.2.0 +pyyaml~=6.0 +numpy~=1.24.3 +allure-pytest>=2.8.18 +Faker==19.2.0 +pymilvus==2.4.0rc39 +scikit-learn~=1.1.3 +pytest-xdist==2.5.0 +minio==7.1.14 + +# for bf16 datatype +jax==0.4.13 +jaxlib==0.4.13 diff --git a/tests/restful_client_v2/testcases/test_alias_operation.py b/tests/restful_client_v2/testcases/test_alias_operation.py new file mode 100644 index 0000000000..75b47ef498 --- /dev/null +++ b/tests/restful_client_v2/testcases/test_alias_operation.py @@ -0,0 +1,125 @@ +import random +from sklearn import preprocessing +import numpy as np +from utils.utils import gen_collection_name +from utils.util_log import test_log as logger +import pytest +from base.testbase import TestBase + + +@pytest.mark.L0 +class TestAliasE2E(TestBase): + + def test_alias_e2e(self): + """ + """ + # list alias before create + rsp = self.alias_client.list_alias() + name = gen_collection_name() + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{128}"}} + ] + }, + "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}] + } + logger.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + # create alias + alias_name = name + "_alias" + payload = { + "collectionName": name, + "aliasName": alias_name + } + rsp = self.alias_client.create_alias(payload) + assert rsp['code'] == 200 + # list alias after create + rsp = self.alias_client.list_alias() + assert alias_name in rsp['data'] + # describe alias + rsp = self.alias_client.describe_alias(alias_name) + assert rsp['data']["aliasName"] == alias_name + assert rsp['data']["collectionName"] == name + + # do crud operation by alias + # insert data by alias + data = [] + for j in range(3000): + tmp = { + "book_id": j, + "word_count": j, + "book_describe": f"book_{j}", + "book_intro": preprocessing.normalize([np.array([random.random() for _ in range(128)])])[0].tolist(), + } + data.append(tmp) + + payload = { + "collectionName": alias_name, + "data": data + } + rsp = self.vector_client.vector_insert(payload) + # delete data by alias + payload = { + "collectionName": alias_name, + "ids": [1, 2, 3] + } + rsp = self.vector_client.vector_delete(payload) + + # upsert data by alias + upsert_data = [] + for j in range(100): + tmp = { + "book_id": j, + "word_count": j + 1, + "book_describe": f"book_{j + 2}", + "book_intro": preprocessing.normalize([np.array([random.random() for _ in range(128)])])[0].tolist(), + } + upsert_data.append(tmp) + payload = { + "collectionName": alias_name, + "data": upsert_data + } + rsp = self.vector_client.vector_upsert(payload) + # search data by alias + payload = { + "collectionName": alias_name, + "vector": preprocessing.normalize([np.array([random.random() for i in range(128)])])[0].tolist() + } + rsp = self.vector_client.vector_search(payload) + # query data by alias + payload = { + "collectionName": alias_name, + "filter": "book_id > 10" + } + rsp = self.vector_client.vector_query(payload) + + # alter alias to another collection + new_name = gen_collection_name() + payload = { + "collectionName": new_name, + "metricType": "L2", + "dimension": 128, + } + rsp = client.collection_create(payload) + payload = { + "collectionName": new_name, + "aliasName": alias_name + } + rsp = self.alias_client.alter_alias(payload) + # describe alias + rsp = self.alias_client.describe_alias(alias_name) + assert rsp['data']["aliasName"] == alias_name + assert rsp['data']["collectionName"] == new_name + # query data by alias, expect no data + payload = { + "collectionName": alias_name, + "filter": "id > 0" + } + rsp = self.vector_client.vector_query(payload) + assert rsp['data'] == [] diff --git a/tests/restful_client_v2/testcases/test_collection_operations.py b/tests/restful_client_v2/testcases/test_collection_operations.py new file mode 100644 index 0000000000..aae36f081a --- /dev/null +++ b/tests/restful_client_v2/testcases/test_collection_operations.py @@ -0,0 +1,968 @@ +import datetime +import logging +import time +from utils.util_log import test_log as logger +from utils.utils import gen_collection_name +import pytest +from api.milvus import CollectionClient +from base.testbase import TestBase +import threading +from utils.utils import get_data_by_payload +from pymilvus import ( + FieldSchema, CollectionSchema, DataType, + Collection +) + + +@pytest.mark.L0 +class TestCreateCollection(TestBase): + + @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"]) + @pytest.mark.parametrize("dim", [128]) + def test_create_collections_fast(self, dim, metric_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "dimension": dim, + "metricType": metric_type + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + + all_collections = rsp['data'] + assert name in all_collections + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("enable_partition_key", [True, False]) + @pytest.mark.parametrize("dim", [128]) + def test_create_collections_custom_without_index(self, dim, auto_id, enable_dynamic_field, enable_partition_key): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "autoId": auto_id, + "enableDynamicField": enable_dynamic_field, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": enable_partition_key, + "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + {"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + ] + } + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + + all_collections = rsp['data'] + assert name in all_collections + c = Collection(name) + logger.info(f"schema: {c.schema}") + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + assert rsp['data']['autoId'] == auto_id + assert c.schema.auto_id == auto_id + assert rsp['data']['enableDynamicField'] == enable_dynamic_field + assert c.schema.enable_dynamic_field == enable_dynamic_field + # assert no index created + indexes = rsp['data']['indexes'] + assert len(indexes) == 0 + # assert not loaded + assert rsp['data']['load'] == "LoadStateNotLoad" + for field in rsp['data']['fields']: + if field['name'] == "user_id": + assert field['partitionKey'] == enable_partition_key + for field in c.schema.fields: + if field.name == "user_id": + assert field.is_partition_key == enable_partition_key + + @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"]) + @pytest.mark.parametrize("dim", [128]) + def test_create_collections_one_float_vector_with_index(self, dim, metric_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + }, + "indexParams": [ + {"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}] + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + + all_collections = rsp['data'] + assert name in all_collections + # describe collection + time.sleep(10) + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + # assert index created + indexes = rsp['data']['indexes'] + assert len(indexes) == len(payload['indexParams']) + # assert load success + assert rsp['data']['load'] == "LoadStateLoaded" + + @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"]) + @pytest.mark.parametrize("dim", [128]) + def test_create_collections_multi_float_vector_with_one_index(self, dim, metric_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + {"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + }, + "indexParams": [ + {"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}] + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 65535 + rsp = client.collection_list() + + all_collections = rsp['data'] + assert name in all_collections + # describe collection + time.sleep(10) + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + # assert index created + indexes = rsp['data']['indexes'] + assert len(indexes) == len(payload['indexParams']) + # assert load success + assert rsp['data']['load'] == "LoadStateNotLoad" + + @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"]) + @pytest.mark.parametrize("dim", [128]) + def test_create_collections_multi_float_vector_with_all_index(self, dim, metric_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + {"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + }, + "indexParams": [ + {"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}, + {"fieldName": "image_intro", "indexName": "image_intro_vector", "metricType": f"{metric_type}"}] + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + + all_collections = rsp['data'] + assert name in all_collections + # describe collection + time.sleep(10) + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + # assert index created + indexes = rsp['data']['indexes'] + assert len(indexes) == len(payload['indexParams']) + # assert load success + assert rsp['data']['load'] == "LoadStateLoaded" + + @pytest.mark.parametrize("auto_id", [True]) + @pytest.mark.parametrize("enable_dynamic_field", [True]) + @pytest.mark.parametrize("enable_partition_key", [True]) + @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"]) + def test_create_collections_float16_vector_datatype(self, dim, auto_id, enable_dynamic_field, enable_partition_key, + metric_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "autoId": auto_id, + "enableDynamicField": enable_dynamic_field, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "float16_vector", "dataType": "Float16Vector", + "elementTypeParams": {"dim": f"{dim}"}}, + {"fieldName": "bfloat16_vector", "dataType": "BFloat16Vector", + "elementTypeParams": {"dim": f"{dim}"}}, + ] + }, + "indexParams": [ + {"fieldName": "float16_vector", "indexName": "float16_vector_index", "metricType": f"{metric_type}"}, + {"fieldName": "bfloat16_vector", "indexName": "bfloat16_vector_index", "metricType": f"{metric_type}"}] + + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + + all_collections = rsp['data'] + assert name in all_collections + c = Collection(name) + logger.info(f"schema: {c.schema}") + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + assert len(rsp['data']['fields']) == len(c.schema.fields) + + @pytest.mark.parametrize("auto_id", [True]) + @pytest.mark.parametrize("enable_dynamic_field", [True]) + @pytest.mark.parametrize("enable_partition_key", [True]) + @pytest.mark.parametrize("dim", [128]) + @pytest.mark.parametrize("metric_type", ["JACCARD", "HAMMING"]) + def test_create_collections_binary_vector_datatype(self, dim, auto_id, enable_dynamic_field, enable_partition_key, + metric_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "autoId": auto_id, + "enableDynamicField": enable_dynamic_field, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "binary_vector", "dataType": "BinaryVector", "elementTypeParams": {"dim": f"{dim}"}}, + ] + }, + "indexParams": [ + {"fieldName": "binary_vector", "indexName": "binary_vector_index", "metricType": f"{metric_type}"} + ] + + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + + all_collections = rsp['data'] + assert name in all_collections + c = Collection(name) + logger.info(f"schema: {c.schema}") + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + assert len(rsp['data']['fields']) == len(c.schema.fields) + + def test_create_collections_concurrent_with_same_param(self): + """ + target: test create collection with same param + method: concurrent create collections with same param with multi thread + expected: create collections all success + """ + concurrent_rsp = [] + + def create_collection(c_name, vector_dim, c_metric_type): + collection_payload = { + "collectionName": c_name, + "dimension": vector_dim, + "metricType": c_metric_type, + } + rsp = client.collection_create(collection_payload) + concurrent_rsp.append(rsp) + logger.info(rsp) + + name = gen_collection_name() + dim = 128 + metric_type = "L2" + client = self.collection_client + threads = [] + for i in range(10): + t = threading.Thread(target=create_collection, args=(name, dim, metric_type,)) + threads.append(t) + for t in threads: + t.start() + for t in threads: + t.join() + time.sleep(10) + success_cnt = 0 + for rsp in concurrent_rsp: + if rsp["code"] == 200: + success_cnt += 1 + logger.info(concurrent_rsp) + assert success_cnt == 10 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + assert f"FloatVector({dim})" in str(rsp['data']['fields']) + + def test_create_collections_concurrent_with_different_param(self): + """ + target: test create collection with different param + method: concurrent create collections with different param with multi thread + expected: only one collection can success + """ + concurrent_rsp = [] + + def create_collection(c_name, vector_dim, c_metric_type): + collection_payload = { + "collectionName": c_name, + "dimension": vector_dim, + "metricType": c_metric_type, + } + rsp = client.collection_create(collection_payload) + concurrent_rsp.append(rsp) + logger.info(rsp) + + name = gen_collection_name() + dim = 128 + client = self.collection_client + threads = [] + for i in range(0, 5): + t = threading.Thread(target=create_collection, args=(name, dim + i, "L2",)) + threads.append(t) + for i in range(5, 10): + t = threading.Thread(target=create_collection, args=(name, dim + i, "IP",)) + threads.append(t) + for t in threads: + t.start() + for t in threads: + t.join() + time.sleep(10) + success_cnt = 0 + for rsp in concurrent_rsp: + if rsp["code"] == 200: + success_cnt += 1 + logger.info(concurrent_rsp) + assert success_cnt == 1 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + + +@pytest.mark.L1 +class TestCreateCollectionNegative(TestBase): + + def test_create_collections_custom_with_invalid_datatype(self): + """ + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VARCHAR", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logging.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + assert rsp['code'] == 1100 + + def test_create_collections_with_invalid_api_key(self): + """ + target: test create collection with invalid api key(wrong username and password) + method: create collections with invalid api key + expected: create collection failed + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + client.api_key = "illegal_api_key" + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 1800 + + @pytest.mark.parametrize("name", + [" ", "test_collection_" * 100, "test collection", "test/collection", "test\collection"]) + def test_create_collections_with_invalid_collection_name(self, name): + """ + target: test create collection with invalid collection name + method: create collections with invalid collection name + expected: create collection failed with right error message + """ + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 1 + + +@pytest.mark.L0 +class TestHasCollections(TestBase): + + def test_has_collections_default(self): + """ + target: test list collection with a simple schema + method: create collections and list them + expected: created collections are in list + """ + client = self.collection_client + name_list = [] + for i in range(2): + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "metricType": "L2", + "dimension": dim, + } + time.sleep(1) + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + name_list.append(name) + rsp = client.collection_list() + all_collections = rsp['data'] + for name in name_list: + assert name in all_collections + rsp = client.collection_has(collection_name=name) + assert rsp['data']['has'] is True + + def test_has_collections_with_not_exist_name(self): + """ + target: test list collection with a simple schema + method: create collections and list them + expected: created collections are in list + """ + client = self.collection_client + name_list = [] + for i in range(2): + name = gen_collection_name() + name_list.append(name) + rsp = client.collection_list() + all_collections = rsp['data'] + for name in name_list: + assert name not in all_collections + rsp = client.collection_has(collection_name=name) + assert rsp['data']['has'] is False + + +@pytest.mark.L0 +class TestGetCollectionStats(TestBase): + + def test_get_collections_stats(self): + """ + target: test list collection with a simple schema + method: create collections and list them + expected: created collections are in list + """ + client = self.collection_client + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "metricType": "L2", + "dimension": dim, + } + time.sleep(1) + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + # describe collection + client.collection_describe(collection_name=name) + rsp = client.collection_stats(collection_name=name) + assert rsp['code'] == 200 + assert rsp['data']['rowCount'] == 0 + # insert data + nb = 3000 + data = get_data_by_payload(payload, nb) + payload = { + "collectionName": name, + "data": data + } + self.vector_client.vector_insert(payload=payload) + c = Collection(name) + count = c.query(expr="", output_fields=["count(*)"]) + logger.info(f"count: {count}") + c.flush() + rsp = client.collection_stats(collection_name=name) + assert rsp['data']['rowCount'] == nb + + +class TestLoadReleaseCollection(TestBase): + + def test_load_and_release_collection(self): + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + # create index before load + index_params = [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}] + payload = { + "collectionName": name, + "indexParams": index_params + } + rsp = self.index_client.index_create(payload) + + # get load state before load + rsp = client.collection_load_state(collection_name=name) + assert rsp['data']['loadState'] == "LoadStateNotLoad" + + # describe collection + client.collection_describe(collection_name=name) + rsp = client.collection_load(collection_name=name) + assert rsp['code'] == 200 + rsp = client.collection_load_state(collection_name=name) + assert rsp['data']['loadState'] in ["LoadStateLoaded", "LoadStateLoading"] + time.sleep(5) + rsp = client.collection_load_state(collection_name=name) + assert rsp['data']['loadState'] == "LoadStateLoaded" + + # release collection + rsp = client.collection_release(collection_name=name) + time.sleep(5) + rsp = client.collection_load_state(collection_name=name) + assert rsp['data']['loadState'] == "LoadStateNotLoad" + + +class TestGetCollectionLoadState(TestBase): + + def test_get_collection_load_state(self): + """ + target: test list collection with a simple schema + method: create collections and list them + expected: created collections are in list + """ + client = self.collection_client + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "metricType": "L2", + "dimension": dim, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + # describe collection + client.collection_describe(collection_name=name) + rsp = client.collection_load_state(collection_name=name) + assert rsp['code'] == 200 + assert rsp['data']['load'] == "LoadStateNotLoad" + # insert data + nb = 3000 + data = get_data_by_payload(payload, nb) + payload = { + "collectionName": name, + "data": data + } + self.vector_client.vector_insert(payload=payload) + rsp = client.collection_load_state(collection_name=name) + assert rsp['data']['load'] == "LoadStateLoading" + time.sleep(10) + rsp = client.collection_load_state(collection_name=name) + assert rsp['data']['load'] == "LoadStateLoaded" + + +@pytest.mark.L0 +class TestListCollections(TestBase): + + def test_list_collections_default(self): + """ + target: test list collection with a simple schema + method: create collections and list them + expected: created collections are in list + """ + client = self.collection_client + name_list = [] + for i in range(2): + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "metricType": "L2", + "dimension": dim, + } + time.sleep(1) + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + name_list.append(name) + rsp = client.collection_list() + all_collections = rsp['data'] + for name in name_list: + assert name in all_collections + + +@pytest.mark.L1 +class TestListCollectionsNegative(TestBase): + def test_list_collections_with_invalid_api_key(self): + """ + target: test list collection with an invalid api key + method: list collection with invalid api key + expected: raise error with right error code and message + """ + client = self.collection_client + name_list = [] + for i in range(2): + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "metricType": "L2", + "dimension": dim, + } + time.sleep(1) + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + name_list.append(name) + client = self.collection_client + client.api_key = "illegal_api_key" + rsp = client.collection_list() + assert rsp['code'] == 1800 + + +@pytest.mark.L0 +class TestDescribeCollection(TestBase): + + def test_describe_collections_default(self): + """ + target: test describe collection with a simple schema + method: describe collection + expected: info of description is same with param passed to create collection + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "dimension": dim, + "metricType": "L2" + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + assert rsp['data']['autoId'] is True + assert rsp['data']['enableDynamicField'] is True + assert len(rsp['data']['indexes']) == 1 + + def test_describe_collections_custom(self): + """ + target: test describe collection with a simple schema + method: describe collection + expected: info of description is same with param passed to create collection + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + fields = [ + FieldSchema(name='reviewer_id', dtype=DataType.INT64, description="", is_primary=True), + FieldSchema(name='store_address', dtype=DataType.VARCHAR, description="", max_length=512, + is_partition_key=True), + FieldSchema(name='review', dtype=DataType.VARCHAR, description="", max_length=16384), + FieldSchema(name='vector', dtype=DataType.FLOAT_VECTOR, description="", dim=384, is_index=True), + ] + + schema = CollectionSchema( + fields=fields, + description="", + enable_dynamic_field=True, + # The following is an alternative to setting `is_partition_key` in a field schema. + partition_key_field="store_address" + ) + + collection = Collection( + name=name, + schema=schema, + ) + logger.info(f"schema: {schema}") + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + + for field in rsp['data']['fields']: + if field['name'] == "store_address": + assert field['PartitionKey'] is True + if field['name'] == "reviewer_id": + assert field['primaryKey'] is True + assert rsp['data']['autoId'] is False + assert rsp['data']['enableDynamicField'] is True + + +@pytest.mark.L1 +class TestDescribeCollectionNegative(TestBase): + def test_describe_collections_with_invalid_api_key(self): + """ + target: test describe collection with invalid api key + method: describe collection with invalid api key + expected: raise error with right error code and message + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # describe collection + illegal_client = CollectionClient(self.url, "illegal_api_key") + rsp = illegal_client.collection_describe(name) + assert rsp['code'] == 1800 + + def test_describe_collections_with_invalid_collection_name(self): + """ + target: test describe collection with invalid collection name + method: describe collection with invalid collection name + expected: raise error with right error code and message + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # describe collection + invalid_name = "invalid_name" + rsp = client.collection_describe(invalid_name) + assert rsp['code'] == 1 + + +@pytest.mark.L0 +class TestDropCollection(TestBase): + def test_drop_collections_default(self): + """ + Drop a collection with a simple schema + target: test drop collection with a simple schema + method: drop collection + expected: dropped collection was not in collection list + """ + clo_list = [] + for i in range(5): + time.sleep(1) + name = 'test_collection_' + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f_%f") + payload = { + "collectionName": name, + "dimension": 128, + "metricType": "L2" + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + clo_list.append(name) + rsp = self.collection_client.collection_list() + all_collections = rsp['data'] + for name in clo_list: + assert name in all_collections + for name in clo_list: + time.sleep(0.2) + payload = { + "collectionName": name, + } + rsp = self.collection_client.collection_drop(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_list() + all_collections = rsp['data'] + for name in clo_list: + assert name not in all_collections + + +@pytest.mark.L1 +class TestDropCollectionNegative(TestBase): + def test_drop_collections_with_invalid_api_key(self): + """ + target: test drop collection with invalid api key + method: drop collection with invalid api key + expected: raise error with right error code and message; collection still in collection list + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # drop collection + payload = { + "collectionName": name, + } + illegal_client = CollectionClient(self.url, "invalid_api_key") + rsp = illegal_client.collection_drop(payload) + assert rsp['code'] == 1800 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + + def test_drop_collections_with_invalid_collection_name(self): + """ + target: test drop collection with invalid collection name + method: drop collection with invalid collection name + expected: raise error with right error code and message + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # drop collection + invalid_name = "invalid_name" + payload = { + "collectionName": invalid_name, + } + rsp = client.collection_drop(payload) + assert rsp['code'] == 100 + + +@pytest.mark.L0 +class TestRenameCollection(TestBase): + + def test_rename_collection(self): + """ + target: test rename collection + method: rename collection + expected: renamed collection is in collection list + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "metricType": "L2", + "dimension": dim, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + new_name = gen_collection_name() + payload = { + "collectionName": name, + "newCollectionName": new_name, + } + rsp = client.collection_rename(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + all_collections = rsp['data'] + assert new_name in all_collections + assert name not in all_collections diff --git a/tests/restful_client_v2/testcases/test_index_operation.py b/tests/restful_client_v2/testcases/test_index_operation.py new file mode 100644 index 0000000000..3ac5ba1360 --- /dev/null +++ b/tests/restful_client_v2/testcases/test_index_operation.py @@ -0,0 +1,303 @@ +import random +from sklearn import preprocessing +import numpy as np +import sys +import json +import time +from utils import constant +from utils.utils import gen_collection_name +from utils.util_log import test_log as logger +import pytest +from base.testbase import TestBase +from utils.utils import gen_vector +from pymilvus import ( + FieldSchema, CollectionSchema, DataType, + Collection +) + + +@pytest.mark.L0 +class TestCreateIndex(TestBase): + + @pytest.mark.parametrize("metric_type", ["L2"]) + @pytest.mark.parametrize("index_type", ["AUTOINDEX", "HNSW"]) + @pytest.mark.parametrize("dim", [128]) + def test_index_e2e(self, dim, metric_type, index_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logger.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + # insert data + for i in range(1): + data = [] + for j in range(3000): + tmp = { + "book_id": j, + "word_count": j, + "book_describe": f"book_{j}", + "book_intro": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[ + 0].tolist(), + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data + } + rsp = self.vector_client.vector_insert(payload) + c = Collection(name) + c.flush() + # list index, expect empty + rsp = self.index_client.index_list(name) + + # create index + payload = { + "collectionName": name, + "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", + "metricType": f"{metric_type}"}] + } + if index_type == "HNSW": + payload["indexParams"][0]["indexConfig"] = {"index_type": "HNSW", "M": "16", "efConstruction": "200"} + if index_type == "AUTOINDEX": + payload["indexParams"][0]["indexConfig"] = {"index_type": "AUTOINDEX"} + rsp = self.index_client.index_create(payload) + assert rsp['code'] == 200 + time.sleep(10) + # list index, expect not empty + rsp = self.index_client.index_list(collection_name=name) + # describe index + rsp = self.index_client.index_describe(collection_name=name, index_name="book_intro_vector") + assert rsp['code'] == 200 + assert len(rsp['data']) == len(payload['indexParams']) + expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) + actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) + for i in range(len(expected_index)): + assert expected_index[i]['fieldName'] == actual_index[i]['fieldName'] + assert expected_index[i]['indexName'] == actual_index[i]['indexName'] + assert expected_index[i]['metricType'] == actual_index[i]['metricType'] + assert expected_index[i]["indexConfig"]['index_type'] == actual_index[i]['indexType'] + + # drop index + for i in range(len(actual_index)): + payload = { + "collectionName": name, + "indexName": actual_index[i]['indexName'] + } + rsp = self.index_client.index_drop(payload) + assert rsp['code'] == 200 + # list index, expect empty + rsp = self.index_client.index_list(collection_name=name) + assert rsp['data'] == [] + + @pytest.mark.parametrize("index_type", ["INVERTED"]) + @pytest.mark.parametrize("dim", [128]) + def test_index_for_scalar_field(self, dim, index_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logger.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + # insert data + for i in range(1): + data = [] + for j in range(3000): + tmp = { + "book_id": j, + "word_count": j, + "book_describe": f"book_{j}", + "book_intro": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[ + 0].tolist(), + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data + } + rsp = self.vector_client.vector_insert(payload) + c = Collection(name) + c.flush() + # list index, expect empty + rsp = self.index_client.index_list(name) + + # create index + payload = { + "collectionName": name, + "indexParams": [{"fieldName": "word_count", "indexName": "word_count_vector", + "indexConfig": {"index_type": "INVERTED"}}] + } + rsp = self.index_client.index_create(payload) + assert rsp['code'] == 200 + time.sleep(10) + # list index, expect not empty + rsp = self.index_client.index_list(collection_name=name) + # describe index + rsp = self.index_client.index_describe(collection_name=name, index_name="word_count_vector") + assert rsp['code'] == 200 + assert len(rsp['data']) == len(payload['indexParams']) + expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) + actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) + for i in range(len(expected_index)): + assert expected_index[i]['fieldName'] == actual_index[i]['fieldName'] + assert expected_index[i]['indexName'] == actual_index[i]['indexName'] + assert expected_index[i]['indexConfig']['index_type'] == actual_index[i]['indexType'] + + @pytest.mark.parametrize("index_type", ["BIN_FLAT", "BIN_IVF_FLAT"]) + @pytest.mark.parametrize("metric_type", ["JACCARD", "HAMMING"]) + @pytest.mark.parametrize("dim", [128]) + def test_index_for_binary_vector_field(self, dim, metric_type, index_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "binary_vector", "dataType": "BinaryVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logger.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + # insert data + for i in range(1): + data = [] + for j in range(3000): + tmp = { + "book_id": j, + "word_count": j, + "book_describe": f"book_{j}", + "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data + } + rsp = self.vector_client.vector_insert(payload) + c = Collection(name) + c.flush() + # list index, expect empty + rsp = self.index_client.index_list(name) + + # create index + index_name = "binary_vector_index" + payload = { + "collectionName": name, + "indexParams": [{"fieldName": "binary_vector", "indexName": index_name, "metricType": metric_type, + "indexConfig": {"index_type": index_type}}] + } + if index_type == "BIN_IVF_FLAT": + payload["indexParams"][0]["indexConfig"]["nlist"] = "16384" + rsp = self.index_client.index_create(payload) + assert rsp['code'] == 200 + time.sleep(10) + # list index, expect not empty + rsp = self.index_client.index_list(collection_name=name) + # describe index + rsp = self.index_client.index_describe(collection_name=name, index_name=index_name) + assert rsp['code'] == 200 + assert len(rsp['data']) == len(payload['indexParams']) + expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName']) + actual_index = sorted(rsp['data'], key=lambda x: x['fieldName']) + for i in range(len(expected_index)): + assert expected_index[i]['fieldName'] == actual_index[i]['fieldName'] + assert expected_index[i]['indexName'] == actual_index[i]['indexName'] + assert expected_index[i]['indexConfig']['index_type'] == actual_index[i]['indexType'] + + +@pytest.mark.L0 +class TestCreateIndexNegative(TestBase): + + @pytest.mark.parametrize("index_type", ["BIN_FLAT", "BIN_IVF_FLAT"]) + @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"]) + @pytest.mark.parametrize("dim", [128]) + def test_index_for_binary_vector_field_with_mismatch_metric_type(self, dim, metric_type, index_type): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "binary_vector", "dataType": "BinaryVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + } + } + logger.info(f"create collection {name} with payload: {payload}") + rsp = client.collection_create(payload) + # insert data + for i in range(1): + data = [] + for j in range(3000): + tmp = { + "book_id": j, + "word_count": j, + "book_describe": f"book_{j}", + "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data + } + rsp = self.vector_client.vector_insert(payload) + c = Collection(name) + c.flush() + # list index, expect empty + rsp = self.index_client.index_list(name) + + # create index + index_name = "binary_vector_index" + payload = { + "collectionName": name, + "indexParams": [{"fieldName": "binary_vector", "indexName": index_name, "metricType": metric_type, + "indexConfig": {"index_type": index_type}}] + } + if index_type == "BIN_IVF_FLAT": + payload["indexParams"][0]["indexConfig"]["nlist"] = "16384" + rsp = self.index_client.index_create(payload) + assert rsp['code'] == 65535 diff --git a/tests/restful_client_v2/testcases/test_jobs_operation.py b/tests/restful_client_v2/testcases/test_jobs_operation.py new file mode 100644 index 0000000000..231ba856a2 --- /dev/null +++ b/tests/restful_client_v2/testcases/test_jobs_operation.py @@ -0,0 +1,81 @@ +import random +import json +import time +from utils.utils import gen_collection_name +import pytest +from base.testbase import TestBase + + +@pytest.mark.L0 +class TestJobE2E(TestBase): + + def test_job_e2e(self): + # create collection + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + }, + "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}] + } + rsp = self.collection_client.collection_create(payload) + + # upload file to storage + data = [{ + "book_id": i, + "word_count": i, + "book_describe": f"book_{i}", + "book_intro": [random.random() for _ in range(dim)]} + for i in range(10000)] + + # dump data to file + file_name = "bulk_insert_data.json" + file_path = f"/tmp/{file_name}" + with open(file_path, "w") as f: + json.dump(data, f) + # upload file to minio storage + self.storage_client.upload_file(file_path, file_name) + + # create import job + payload = { + "collectionName": name, + "files": [file_name], + } + rsp = self.import_job_client.create_import_jobs(payload) + # list import job + payload = { + "collectionName": name, + } + rsp = self.import_job_client.list_import_jobs(payload) + + # get import job progress + for task in rsp['data']: + task_id = task['taskID'] + finished = False + t0 = time.time() + + while not finished: + rsp = self.import_job_client.get_import_job_progress(task_id) + if rsp['data']['state'] == "ImportCompleted": + finished = True + time.sleep(5) + if time.time() - t0 > 120: + assert False, "import job timeout" + time.sleep(10) + # query data + payload = { + "collectionName": name, + "filter": f"book_id in {[i for i in range(1000)]}", + "limit": 100, + "offset": 0, + "outputFields": ["*"] + } + rsp = self.vector_client.vector_query(payload) + assert len(rsp['data']) == 100 diff --git a/tests/restful_client_v2/testcases/test_partition_operation.py b/tests/restful_client_v2/testcases/test_partition_operation.py new file mode 100644 index 0000000000..1302289554 --- /dev/null +++ b/tests/restful_client_v2/testcases/test_partition_operation.py @@ -0,0 +1,124 @@ +import random +from sklearn import preprocessing +import numpy as np +from utils.utils import gen_collection_name +import pytest +from base.testbase import TestBase +from pymilvus import ( + Collection +) + + +@pytest.mark.L0 +class TestPartitionE2E(TestBase): + + def test_partition_e2e(self): + """ + target: test create collection + method: create a collection with a simple schema + expected: create collection success + """ + name = gen_collection_name() + dim = 128 + metric_type = "L2" + client = self.collection_client + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + }, + "indexParams": [ + {"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + # describe collection + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + # insert data to default partition + data = [] + for j in range(3000): + tmp = { + "book_id": j, + "word_count": j, + "book_describe": f"book_{j}", + "book_intro": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + # create partition + partition_name = "test_partition" + rsp = self.partition_client.partition_create(collection_name=name, partition_name=partition_name) + assert rsp['code'] == 200 + # insert data to partition + data = [] + for j in range(3000, 6000): + tmp = { + "book_id": j, + "word_count": j, + "book_describe": f"book_{j}", + "book_intro": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + } + data.append(tmp) + payload = { + "collectionName": name, + "partitionName": partition_name, + "data": data, + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + + # create partition again + rsp = self.partition_client.partition_create(collection_name=name, partition_name=partition_name) + # list partitions + rsp = self.partition_client.partition_list(collection_name=name) + assert rsp['code'] == 200 + assert partition_name in rsp['data'] + # has partition + rsp = self.partition_client.partition_has(collection_name=name, partition_name=partition_name) + assert rsp['code'] == 200 + assert rsp['data']["has"] is True + # flush and get partition statistics + c = Collection(name=name) + c.flush() + rsp = self.partition_client.partition_stats(collection_name=name, partition_name=partition_name) + assert rsp['code'] == 200 + assert rsp['data']['rowCount'] == 3000 + + # release partition + rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) + assert rsp['code'] == 200 + # release partition again + rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) + assert rsp['code'] == 200 + # load partition + rsp = self.partition_client.partition_load(collection_name=name, partition_names=[partition_name]) + assert rsp['code'] == 200 + # load partition again + rsp = self.partition_client.partition_load(collection_name=name, partition_names=[partition_name]) + assert rsp['code'] == 200 + # drop partition when it is loaded + rsp = self.partition_client.partition_drop(collection_name=name, partition_name=partition_name) + assert rsp['code'] == 65535 + # drop partition after release + rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name]) + rsp = self.partition_client.partition_drop(collection_name=name, partition_name=partition_name) + assert rsp['code'] == 200 + # has partition + rsp = self.partition_client.partition_has(collection_name=name, partition_name=partition_name) + assert rsp['code'] == 200 + assert rsp['data']["has"] is False diff --git a/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py b/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py new file mode 100644 index 0000000000..b2f09bcbfb --- /dev/null +++ b/tests/restful_client_v2/testcases/test_restful_sdk_mix_use_scenario.py @@ -0,0 +1,315 @@ +import random +import time +from utils.utils import gen_collection_name +from utils.util_log import test_log as logger +import pytest +from base.testbase import TestBase +from pymilvus import ( + FieldSchema, CollectionSchema, DataType, + Collection +) + + +@pytest.mark.L0 +class TestRestfulSdkCompatibility(TestBase): + + @pytest.mark.parametrize("dim", [128, 256]) + @pytest.mark.parametrize("enable_dynamic", [True, False]) + @pytest.mark.parametrize("shard_num", [1, 2]) + def test_collection_created_by_sdk_describe_by_restful(self, dim, enable_dynamic, shard_num): + """ + """ + # 1. create collection by sdk + name = gen_collection_name() + default_fields = [ + FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), + FieldSchema(name="float", dtype=DataType.FLOAT), + FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535), + FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim) + ] + default_schema = CollectionSchema(fields=default_fields, description="test collection", + enable_dynamic_field=enable_dynamic) + collection = Collection(name=name, schema=default_schema, shards_num=shard_num) + logger.info(collection.schema) + # 2. use restful to get collection info + client = self.collection_client + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + assert rsp['data']['enableDynamicField'] == enable_dynamic + assert rsp['data']['load'] == "LoadStateNotLoad" + assert rsp['data']['shardsNum'] == shard_num + + @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"]) + @pytest.mark.parametrize("dim", [128]) + def test_collection_created_by_restful_describe_by_sdk(self, dim, metric_type): + """ + """ + name = gen_collection_name() + dim = 128 + client = self.collection_client + payload = { + "collectionName": name, + "dimension": dim, + "metricType": metric_type, + } + rsp = client.collection_create(payload) + assert rsp['code'] == 200 + collection = Collection(name=name) + logger.info(collection.schema) + field_names = [field.name for field in collection.schema.fields] + assert len(field_names) == 2 + assert collection.schema.enable_dynamic_field is True + assert len(collection.indexes) > 0 + + @pytest.mark.parametrize("metric_type", ["L2", "IP"]) + def test_collection_created_index_by_sdk_describe_by_restful(self, metric_type): + """ + """ + # 1. create collection by sdk + name = gen_collection_name() + default_fields = [ + FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), + FieldSchema(name="float", dtype=DataType.FLOAT), + FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535), + FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128) + ] + default_schema = CollectionSchema(fields=default_fields, description="test collection", + enable_dynamic_field=True) + collection = Collection(name=name, schema=default_schema) + # create index by sdk + index_param = {"metric_type": metric_type, "index_type": "IVF_FLAT", "params": {"nlist": 128}} + collection.create_index(field_name="float_vector", index_params=index_param) + # 2. use restful to get collection info + client = self.collection_client + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + rsp = client.collection_describe(name) + assert rsp['code'] == 200 + assert rsp['data']['collectionName'] == name + assert len(rsp['data']['indexes']) == 1 and rsp['data']['indexes'][0]['metricType'] == metric_type + + @pytest.mark.parametrize("metric_type", ["L2", "IP"]) + def test_collection_load_by_sdk_describe_by_restful(self, metric_type): + """ + """ + # 1. create collection by sdk + name = gen_collection_name() + default_fields = [ + FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), + FieldSchema(name="float", dtype=DataType.FLOAT), + FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535), + FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128) + ] + default_schema = CollectionSchema(fields=default_fields, description="test collection", + enable_dynamic_field=True) + collection = Collection(name=name, schema=default_schema) + # create index by sdk + index_param = {"metric_type": metric_type, "index_type": "IVF_FLAT", "params": {"nlist": 128}} + collection.create_index(field_name="float_vector", index_params=index_param) + collection.load() + # 2. use restful to get collection info + client = self.collection_client + rsp = client.collection_list() + all_collections = rsp['data'] + assert name in all_collections + rsp = client.collection_describe(name) + assert rsp['data']['load'] == "LoadStateLoaded" + + def test_collection_create_by_sdk_insert_vector_by_restful(self): + """ + """ + # 1. create collection by sdk + dim = 128 + nb = 100 + name = gen_collection_name() + default_fields = [ + FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), + FieldSchema(name="float", dtype=DataType.FLOAT), + FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535), + FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128) + ] + default_schema = CollectionSchema(fields=default_fields, description="test collection", + enable_dynamic_field=True) + collection = Collection(name=name, schema=default_schema) + # create index by sdk + index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}} + collection.create_index(field_name="float_vector", index_params=index_param) + collection.load() + # insert data by restful + data = [ + {"int64": i, "float": i, "varchar": str(i), "float_vector": [random.random() for _ in range(dim)], "age": i} + for i in range(nb) + ] + client = self.vector_client + payload = { + "collectionName": name, + "data": data, + } + rsp = client.vector_insert(payload) + assert rsp['code'] == 200 + assert rsp['data']['insertCount'] == nb + + def test_collection_create_by_sdk_search_vector_by_restful(self): + """ + """ + dim = 128 + nb = 100 + name = gen_collection_name() + default_fields = [ + FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), + FieldSchema(name="float", dtype=DataType.FLOAT), + FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535), + FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128) + ] + default_schema = CollectionSchema(fields=default_fields, description="test collection", + enable_dynamic_field=True) + # init collection by sdk + collection = Collection(name=name, schema=default_schema) + index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}} + collection.create_index(field_name="float_vector", index_params=index_param) + collection.load() + data = [ + {"int64": i, "float": i, "varchar": str(i), "float_vector": [random.random() for _ in range(dim)], "age": i} + for i in range(nb) + ] + collection.insert(data) + client = self.vector_client + payload = { + "collectionName": name, + "vector": [random.random() for _ in range(dim)], + "limit": 10 + } + # search data by restful + rsp = client.vector_search(payload) + assert rsp['code'] == 200 + assert len(rsp['data']) == 10 + + def test_collection_create_by_sdk_query_vector_by_restful(self): + """ + """ + dim = 128 + nb = 100 + name = gen_collection_name() + default_fields = [ + FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), + FieldSchema(name="float", dtype=DataType.FLOAT), + FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535), + FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128) + ] + default_schema = CollectionSchema(fields=default_fields, description="test collection", + enable_dynamic_field=True) + # init collection by sdk + collection = Collection(name=name, schema=default_schema) + index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}} + collection.create_index(field_name="float_vector", index_params=index_param) + collection.load() + data = [ + {"int64": i, "float": i, "varchar": str(i), "float_vector": [random.random() for _ in range(dim)], "age": i} + for i in range(nb) + ] + collection.insert(data) + client = self.vector_client + payload = { + "collectionName": name, + "filter": "int64 in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", + } + # query data by restful + rsp = client.vector_query(payload) + assert rsp['code'] == 200 + assert len(rsp['data']) == 10 + + def test_collection_create_by_restful_search_vector_by_sdk(self): + """ + """ + name = gen_collection_name() + dim = 128 + # insert data by restful + self.init_collection(name, metric_type="L2", dim=dim) + time.sleep(5) + # search data by sdk + collection = Collection(name=name) + nq = 5 + vectors_to_search = [[random.random() for i in range(dim)] for j in range(nq)] + res = collection.search(data=vectors_to_search, anns_field="vector", param={}, limit=10) + assert len(res) == nq + assert len(res[0]) == 10 + + def test_collection_create_by_restful_query_vector_by_sdk(self): + """ + """ + name = gen_collection_name() + dim = 128 + # insert data by restful + self.init_collection(name, metric_type="L2", dim=dim) + time.sleep(5) + # query data by sdk + collection = Collection(name=name) + res = collection.query(expr=f"uid in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"]) + for item in res: + uid = item["uid"] + assert uid in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + + def test_collection_create_by_restful_delete_vector_by_sdk(self): + """ + """ + name = gen_collection_name() + dim = 128 + # insert data by restful + self.init_collection(name, metric_type="L2", dim=dim) + time.sleep(5) + # query data by sdk + collection = Collection(name=name) + res = collection.query(expr=f"uid in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"]) + pk_id_list = [] + for item in res: + uid = item["uid"] + pk_id_list.append(item["id"]) + expr = f"id in {pk_id_list}" + collection.delete(expr) + time.sleep(5) + res = collection.query(expr=f"uid in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"]) + assert len(res) == 0 + + def test_collection_create_by_sdk_delete_vector_by_restful(self): + """ + """ + dim = 128 + nb = 100 + name = gen_collection_name() + default_fields = [ + FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True), + FieldSchema(name="float", dtype=DataType.FLOAT), + FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535), + FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128) + ] + default_schema = CollectionSchema(fields=default_fields, description="test collection", + enable_dynamic_field=True) + # init collection by sdk + collection = Collection(name=name, schema=default_schema) + index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}} + collection.create_index(field_name="float_vector", index_params=index_param) + collection.load() + data = [ + {"int64": i, "float": i, "varchar": str(i), "float_vector": [random.random() for _ in range(dim)], "age": i} + for i in range(nb) + ] + collection.insert(data) + time.sleep(5) + res = collection.query(expr=f"int64 in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"]) + pk_id_list = [] + for item in res: + pk_id_list.append(item["int64"]) + payload = { + "collectionName": name, + "id": pk_id_list + } + # delete data by restful + rsp = self.vector_client.vector_delete(payload) + time.sleep(5) + res = collection.query(expr=f"int64 in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"]) + assert len(res) == 0 diff --git a/tests/restful_client_v2/testcases/test_role_operation.py b/tests/restful_client_v2/testcases/test_role_operation.py new file mode 100644 index 0000000000..08a7e55a99 --- /dev/null +++ b/tests/restful_client_v2/testcases/test_role_operation.py @@ -0,0 +1,79 @@ +from utils.utils import gen_unique_str +from base.testbase import TestBase + + +class TestRoleE2E(TestBase): + + def teardown_method(self): + # because role num is limited, so we need to delete all roles after test + rsp = self.role_client.role_list() + all_roles = rsp['data'] + # delete all roles except default roles + for role in all_roles: + if role.startswith("role"): + payload = { + "roleName": role + } + # revoke privilege from role + rsp = self.role_client.role_describe(role) + for d in rsp['data']: + payload = { + "roleName": role, + "objectType": d['objectType'], + "objectName": d['objectName'], + "privilege": d['privilege'] + } + self.role_client.role_revoke(payload) + self.role_client.role_drop(payload) + + def test_role_e2e(self): + + # list role before create + rsp = self.role_client.role_list() + # create role + role_name = gen_unique_str("role") + payload = { + "roleName": role_name, + } + rsp = self.role_client.role_create(payload) + # list role after create + rsp = self.role_client.role_list() + assert role_name in rsp['data'] + # describe role + rsp = self.role_client.role_describe(role_name) + # grant privilege to role + payload = { + "roleName": role_name, + "objectType": "Global", + "objectName": "*", + "privilege": "CreateCollection" + } + rsp = self.role_client.role_grant(payload) + # describe role after grant + rsp = self.role_client.role_describe(role_name) + privileges = [] + for p in rsp['data']: + privileges.append(p['privilege']) + assert "CreateCollection" in privileges + # revoke privilege from role + payload = { + "roleName": role_name, + "objectType": "Global", + "objectName": "*", + "privilege": "CreateCollection" + } + rsp = self.role_client.role_revoke(payload) + # describe role after revoke + rsp = self.role_client.role_describe(role_name) + privileges = [] + for p in rsp['data']: + privileges.append(p['privilege']) + assert "CreateCollection" not in privileges + # drop role + payload = { + "roleName": role_name + } + rsp = self.role_client.role_drop(payload) + rsp = self.role_client.role_list() + assert role_name not in rsp['data'] + diff --git a/tests/restful_client_v2/testcases/test_user_operation.py b/tests/restful_client_v2/testcases/test_user_operation.py new file mode 100644 index 0000000000..e6d421984e --- /dev/null +++ b/tests/restful_client_v2/testcases/test_user_operation.py @@ -0,0 +1,137 @@ +from utils.utils import gen_collection_name, gen_unique_str +import pytest +from base.testbase import TestBase +from pymilvus import (connections) + + +@pytest.mark.L0 +class TestUserE2E(TestBase): + + def test_user_e2e(self): + # list user before create + + rsp = self.user_client.user_list() + # create user + user_name = gen_unique_str("user") + password = "1234578" + payload = { + "userName": user_name, + "password": password + } + rsp = self.user_client.user_create(payload) + # list user after create + rsp = self.user_client.user_list() + assert user_name in rsp['data'] + # describe user + rsp = self.user_client.user_describe(user_name) + + # update user password + new_password = "87654321" + payload = { + "userName": user_name, + "password": password, + "newPassword": new_password + } + rsp = self.user_client.user_password_update(payload) + assert rsp['code'] == 200 + # drop user + payload = { + "userName": user_name + } + rsp = self.user_client.user_drop(payload) + + rsp = self.user_client.user_list() + assert user_name not in rsp['data'] + + def test_user_binding_role(self): + # create user + user_name = gen_unique_str("user") + password = "12345678" + payload = { + "userName": user_name, + "password": password + } + rsp = self.user_client.user_create(payload) + # list user after create + rsp = self.user_client.user_list() + assert user_name in rsp['data'] + # create role + role_name = gen_unique_str("role") + payload = { + "roleName": role_name, + } + rsp = self.role_client.role_create(payload) + # privilege to role + payload = { + "roleName": role_name, + "objectType": "Global", + "objectName": "*", + "privilege": "All" + } + rsp = self.role_client.role_grant(payload) + # bind role to user + payload = { + "userName": user_name, + "roleName": role_name + } + rsp = self.user_client.user_grant(payload) + # describe user roles + rsp = self.user_client.user_describe(user_name) + + # test user has privilege with pymilvus + uri = self.user_client.endpoint + connections.connect(alias="test", uri=f"{uri}", token=f"{user_name}:{password}") + + # create collection with user + collection_name = gen_collection_name() + payload = { + "collectionName": collection_name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": "128"}} + ] + } + } + self.collection_client.api_key = f"{user_name}:{password}" + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + + +@pytest.mark.L0 +class TestUserNegative(TestBase): + + def test_create_user_with_short_password(self): + # list user before create + + rsp = self.user_client.user_list() + # create user + user_name = gen_unique_str("user") + password = "1234" + payload = { + "userName": user_name, + "password": password + } + rsp = self.user_client.user_create(payload) + assert rsp['code'] == 1100 + + def test_create_user_twice(self): + # list user before create + + rsp = self.user_client.user_list() + # create user + user_name = gen_unique_str("user") + password = "12345678" + payload = { + "userName": user_name, + "password": password + } + for i in range(2): + rsp = self.user_client.user_create(payload) + if i == 0: + assert rsp['code'] == 200 + else: + assert rsp['code'] == 65535 + assert "user already exists" in rsp['message'] diff --git a/tests/restful_client_v2/testcases/test_vector_operations.py b/tests/restful_client_v2/testcases/test_vector_operations.py new file mode 100644 index 0000000000..ec2e6fcf06 --- /dev/null +++ b/tests/restful_client_v2/testcases/test_vector_operations.py @@ -0,0 +1,1240 @@ +import random +from sklearn import preprocessing +import numpy as np +import sys +import json +import time +from utils import constant +from utils.utils import gen_collection_name +from utils.util_log import test_log as logger +import pytest +from base.testbase import TestBase +from utils.utils import (get_data_by_payload, get_common_fields_by_data, gen_vector) +from pymilvus import ( + Collection, utility +) + + +@pytest.mark.L0 +class TestInsertVector(TestBase): + + @pytest.mark.parametrize("insert_round", [3]) + @pytest.mark.parametrize("nb", [3000]) + @pytest.mark.parametrize("dim", [128]) + def test_insert_entities_with_simple_payload(self, nb, dim, insert_round): + """ + Insert a vector with a simple payload + """ + # create a collection + name = gen_collection_name() + collection_payload = { + "collectionName": name, + "dimension": dim, + "metricType": "L2" + } + rsp = self.collection_client.collection_create(collection_payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 200 + # insert data + for i in range(insert_round): + data = get_data_by_payload(collection_payload, nb) + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + assert rsp['data']['insertCount'] == nb + + @pytest.mark.parametrize("insert_round", [1]) + @pytest.mark.parametrize("auto_id", [True, False]) + @pytest.mark.parametrize("is_partition_key", [True, False]) + @pytest.mark.parametrize("enable_dynamic_schema", [True, False]) + @pytest.mark.parametrize("nb", [3000]) + @pytest.mark.parametrize("dim", [128]) + def test_insert_entities_with_all_scalar_datatype(self, nb, dim, insert_round, auto_id, + is_partition_key, enable_dynamic_schema): + """ + Insert a vector with a simple payload + """ + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "autoId": auto_id, + "enableDynamicField": enable_dynamic_schema, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": is_partition_key, + "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "bool", "dataType": "Bool", "elementTypeParams": {}}, + {"fieldName": "json", "dataType": "JSON", "elementTypeParams": {}}, + {"fieldName": "int_array", "dataType": "Array", "elementDataType": "Int64", + "elementTypeParams": {"max_capacity": "1024"}}, + {"fieldName": "varchar_array", "dataType": "Array", "elementDataType": "VarChar", + "elementTypeParams": {"max_capacity": "1024", "max_length": "256"}}, + {"fieldName": "bool_array", "dataType": "Array", "elementDataType": "Bool", + "elementTypeParams": {"max_capacity": "1024"}}, + {"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + {"fieldName": "image_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + ] + }, + "indexParams": [ + {"fieldName": "text_emb", "indexName": "text_emb", "metricType": "L2"}, + {"fieldName": "image_emb", "indexName": "image_emb", "metricType": "L2"} + ] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 200 + # insert data + for i in range(insert_round): + data = [] + for i in range(nb): + if auto_id: + tmp = { + "user_id": i, + "word_count": i, + "book_describe": f"book_{i}", + "bool": random.choice([True, False]), + "json": {"key": i}, + "int_array": [i], + "varchar_array": [f"varchar_{i}"], + "bool_array": [random.choice([True, False])], + "text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[ + 0].tolist(), + "image_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[ + 0].tolist(), + } + else: + tmp = { + "book_id": i, + "user_id": i, + "word_count": i, + "book_describe": f"book_{i}", + "bool": random.choice([True, False]), + "json": {"key": i}, + "int_array": [i], + "varchar_array": [f"varchar_{i}"], + "bool_array": [random.choice([True, False])], + "text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[ + 0].tolist(), + "image_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[ + 0].tolist(), + } + if enable_dynamic_schema: + tmp.update({f"dynamic_field_{i}": i}) + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + assert rsp['data']['insertCount'] == nb + # query data to make sure the data is inserted + rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) + assert rsp['code'] == 200 + assert len(rsp['data']) == 50 + + @pytest.mark.parametrize("insert_round", [1]) + @pytest.mark.parametrize("auto_id", [True]) + @pytest.mark.parametrize("is_partition_key", [True]) + @pytest.mark.parametrize("enable_dynamic_schema", [True]) + @pytest.mark.parametrize("nb", [3000]) + @pytest.mark.parametrize("dim", [128]) + def test_insert_entities_with_all_vector_datatype(self, nb, dim, insert_round, auto_id, + is_partition_key, enable_dynamic_schema): + """ + Insert a vector with a simple payload + """ + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "autoId": auto_id, + "enableDynamicField": enable_dynamic_schema, + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": is_partition_key, + "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "float_vector", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}, + {"fieldName": "float16_vector", "dataType": "Float16Vector", + "elementTypeParams": {"dim": f"{dim}"}}, + {"fieldName": "bfloat16_vector", "dataType": "BFloat16Vector", + "elementTypeParams": {"dim": f"{dim}"}}, + {"fieldName": "binary_vector", "dataType": "BinaryVector", "elementTypeParams": {"dim": f"{dim}"}}, + ] + }, + "indexParams": [ + {"fieldName": "float_vector", "indexName": "float_vector", "metricType": "L2"}, + {"fieldName": "float16_vector", "indexName": "float16_vector", "metricType": "L2"}, + {"fieldName": "bfloat16_vector", "indexName": "bfloat16_vector", "metricType": "L2"}, + {"fieldName": "binary_vector", "indexName": "binary_vector", "metricType": "HAMMING", + "indexConfig": {"index_type": "BIN_IVF_FLAT", "nlist": "512"}} + ] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 200 + # insert data + for i in range(insert_round): + data = [] + for i in range(nb): + if auto_id: + tmp = { + "user_id": i, + "word_count": i, + "book_describe": f"book_{i}", + "float_vector": gen_vector(datatype="FloatVector", dim=dim), + "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), + "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) + } + else: + tmp = { + "book_id": i, + "user_id": i, + "word_count": i, + "book_describe": f"book_{i}", + "float_vector": gen_vector(datatype="FloatVector", dim=dim), + "float16_vector": gen_vector(datatype="Float16Vector", dim=dim), + "bfloat16_vector": gen_vector(datatype="BFloat16Vector", dim=dim), + "binary_vector": gen_vector(datatype="BinaryVector", dim=dim) + } + if enable_dynamic_schema: + tmp.update({f"dynamic_field_{i}": i}) + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + assert rsp['data']['insertCount'] == nb + c = Collection(name) + res = c.query( + expr="user_id > 0", + limit=1, + output_fields=["*"], + ) + logger.info(f"res: {res}") + # query data to make sure the data is inserted + rsp = self.vector_client.vector_query({"collectionName": name, "filter": "user_id > 0", "limit": 50}) + assert rsp['code'] == 200 + assert len(rsp['data']) == 50 + + +@pytest.mark.L1 +class TestInsertVectorNegative(TestBase): + def test_insert_vector_with_invalid_api_key(self): + """ + Insert a vector with invalid api key + """ + # create a collection + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + assert rsp['code'] == 200 + # insert data + nb = 10 + data = [ + { + "vector": [np.float64(random.random()) for _ in range(dim)], + } for _ in range(nb) + ] + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + client = self.vector_client + client.api_key = "invalid_api_key" + rsp = client.vector_insert(payload) + assert rsp['code'] == 1800 + + def test_insert_vector_with_invalid_collection_name(self): + """ + Insert a vector with an invalid collection name + """ + + # create a collection + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + assert rsp['code'] == 200 + # insert data + nb = 100 + data = get_data_by_payload(payload, nb) + payload = { + "collectionName": "invalid_collection_name", + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 1 + + def test_insert_vector_with_invalid_database_name(self): + """ + Insert a vector with an invalid database name + """ + # create a collection + name = gen_collection_name() + dim = 128 + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + assert rsp['code'] == 200 + # insert data + nb = 10 + data = get_data_by_payload(payload, nb) + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + success = False + rsp = self.vector_client.vector_insert(payload, db_name="invalid_database") + assert rsp['code'] == 800 + + def test_insert_vector_with_mismatch_dim(self): + """ + Insert a vector with mismatch dim + """ + # create a collection + name = gen_collection_name() + dim = 32 + payload = { + "collectionName": name, + "dimension": dim, + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + assert rsp['code'] == 200 + # insert data + nb = 1 + data = [ + { + "vector": [np.float64(random.random()) for _ in range(dim + 1)], + } for i in range(nb) + ] + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 1804 + assert rsp['message'] == "fail to deal the insert data" + + +class TestUpsertVector(TestBase): + + @pytest.mark.parametrize("insert_round", [2]) + @pytest.mark.parametrize("nb", [3000]) + @pytest.mark.parametrize("dim", [128]) + def test_upsert_vector(self, nb, dim, insert_round): + # create a collection + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + }, + "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 200 + # insert data + for i in range(insert_round): + data = [] + for j in range(nb): + tmp = { + "book_id": i * nb + j, + "user_id": i * nb + j, + "word_count": i * nb + j, + "book_describe": f"book_{i * nb + j}", + "text_emb": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + assert rsp['data']['insertCount'] == nb + c = Collection(name) + c.flush() + + # upsert data + for i in range(insert_round): + data = [] + for j in range(nb): + tmp = { + "book_id": i * nb + j, + "user_id": i * nb + j + 1, + "word_count": i * nb + j + 2, + "book_describe": f"book_{i * nb + j + 3}", + "text_emb": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_upsert(payload) + # query data to make sure the data is updated + rsp = self.vector_client.vector_query({"collectionName": name, "filter": "book_id > 0"}) + for data in rsp['data']: + assert data['user_id'] == data['book_id'] + 1 + assert data['word_count'] == data['book_id'] + 2 + assert data['book_describe'] == f"book_{data['book_id'] + 3}" + res = utility.get_query_segment_info(name) + logger.info(f"res: {res}") + + +@pytest.mark.L0 +class TestSearchVector(TestBase): + + @pytest.mark.parametrize("metric_type", ["IP", "L2"]) + def test_search_vector_with_simple_payload(self, metric_type): + """ + Search a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + self.init_collection(name, metric_type=metric_type) + + # search data + dim = 128 + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + payload = { + "collectionName": name, + "vector": vector_to_search, + } + rsp = self.vector_client.vector_search(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + limit = int(payload.get("limit", 100)) + assert len(res) == limit + ids = [item['id'] for item in res] + assert len(ids) == len(set(ids)) + distance = [item['distance'] for item in res] + if metric_type == "L2": + assert distance == sorted(distance) + if metric_type == "IP": + assert distance == sorted(distance, reverse=True) + + @pytest.mark.parametrize("sum_limit_offset", [16384, 16385]) + @pytest.mark.xfail(reason="") + def test_search_vector_with_exceed_sum_limit_offset(self, sum_limit_offset): + """ + Search a vector with a simple payload + """ + max_search_sum_limit_offset = constant.MAX_SUM_OFFSET_AND_LIMIT + name = gen_collection_name() + self.name = name + nb = sum_limit_offset + 2000 + metric_type = "IP" + limit = 100 + self.init_collection(name, metric_type=metric_type, nb=nb, batch_size=2000) + + # search data + dim = 128 + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + payload = { + "collectionName": name, + "vector": vector_to_search, + "limit": limit, + "offset": sum_limit_offset - limit, + } + rsp = self.vector_client.vector_search(payload) + if sum_limit_offset > max_search_sum_limit_offset: + assert rsp['code'] == 65535 + return + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + limit = int(payload.get("limit", 100)) + assert len(res) == limit + ids = [item['id'] for item in res] + assert len(ids) == len(set(ids)) + distance = [item['distance'] for item in res] + if metric_type == "L2": + assert distance == sorted(distance) + if metric_type == "IP": + assert distance == sorted(distance, reverse=True) + + @pytest.mark.parametrize("level", [0, 1, 2]) + @pytest.mark.parametrize("offset", [0, 10, 100]) + @pytest.mark.parametrize("limit", [1, 100]) + @pytest.mark.parametrize("metric_type", ["L2", "IP"]) + def test_search_vector_with_complex_payload(self, limit, offset, level, metric_type): + """ + Search a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + nb = limit + offset + 100 + dim = 128 + schema_payload, data = self.init_collection(name, dim=dim, nb=nb, metric_type=metric_type) + vector_field = schema_payload.get("vectorField") + # search data + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) + payload = { + "collectionName": name, + "vector": vector_to_search, + "outputFields": output_fields, + "filter": "uid >= 0", + "limit": limit, + "offset": offset, + } + rsp = self.vector_client.vector_search(payload) + if offset + limit > constant.MAX_SUM_OFFSET_AND_LIMIT: + assert rsp['code'] == 90126 + return + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + assert len(res) == limit + for item in res: + assert item.get("uid") >= 0 + for field in output_fields: + assert field in item + + @pytest.mark.parametrize("filter_expr", ["uid >= 0", "uid >= 0 and uid < 100", "uid in [1,2,3]"]) + def test_search_vector_with_complex_int_filter(self, filter_expr): + """ + Search a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + nb = 200 + dim = 128 + limit = 100 + schema_payload, data = self.init_collection(name, dim=dim, nb=nb) + vector_field = schema_payload.get("vectorField") + # search data + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) + payload = { + "collectionName": name, + "vector": vector_to_search, + "outputFields": output_fields, + "filter": filter_expr, + "limit": limit, + "offset": 0, + } + rsp = self.vector_client.vector_search(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + assert len(res) <= limit + for item in res: + uid = item.get("uid") + eval(filter_expr) + + @pytest.mark.parametrize("filter_expr", ["name > \"placeholder\"", "name like \"placeholder%\""]) + def test_search_vector_with_complex_varchar_filter(self, filter_expr): + """ + Search a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + nb = 200 + dim = 128 + limit = 100 + schema_payload, data = self.init_collection(name, dim=dim, nb=nb) + names = [] + for item in data: + names.append(item.get("name")) + names.sort() + logger.info(f"names: {names}") + mid = len(names) // 2 + prefix = names[mid][0:2] + vector_field = schema_payload.get("vectorField") + # search data + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) + filter_expr = filter_expr.replace("placeholder", prefix) + logger.info(f"filter_expr: {filter_expr}") + payload = { + "collectionName": name, + "vector": vector_to_search, + "outputFields": output_fields, + "filter": filter_expr, + "limit": limit, + "offset": 0, + } + rsp = self.vector_client.vector_search(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + assert len(res) <= limit + for item in res: + name = item.get("name") + logger.info(f"name: {name}") + if ">" in filter_expr: + assert name > prefix + if "like" in filter_expr: + assert name.startswith(prefix) + + @pytest.mark.parametrize("filter_expr", ["uid < 100 and name > \"placeholder\"", + "uid < 100 and name like \"placeholder%\"" + ]) + def test_search_vector_with_complex_int64_varchar_and_filter(self, filter_expr): + """ + Search a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + nb = 200 + dim = 128 + limit = 100 + schema_payload, data = self.init_collection(name, dim=dim, nb=nb) + names = [] + for item in data: + names.append(item.get("name")) + names.sort() + logger.info(f"names: {names}") + mid = len(names) // 2 + prefix = names[mid][0:2] + vector_field = schema_payload.get("vectorField") + # search data + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) + filter_expr = filter_expr.replace("placeholder", prefix) + logger.info(f"filter_expr: {filter_expr}") + payload = { + "collectionName": name, + "vector": vector_to_search, + "outputFields": output_fields, + "filter": filter_expr, + "limit": limit, + "offset": 0, + } + rsp = self.vector_client.vector_search(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + assert len(res) <= limit + for item in res: + uid = item.get("uid") + name = item.get("name") + logger.info(f"name: {name}") + uid_expr = filter_expr.split("and")[0] + assert eval(uid_expr) is True + varchar_expr = filter_expr.split("and")[1] + if ">" in varchar_expr: + assert name > prefix + if "like" in varchar_expr: + assert name.startswith(prefix) + + +@pytest.mark.L1 +class TestSearchVectorNegative(TestBase): + @pytest.mark.parametrize("limit", [0, 16385]) + def test_search_vector_with_invalid_limit(self, limit): + """ + Search a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + dim = 128 + schema_payload, data = self.init_collection(name, dim=dim) + vector_field = schema_payload.get("vectorField") + # search data + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) + payload = { + "collectionName": name, + "vector": vector_to_search, + "outputFields": output_fields, + "filter": "uid >= 0", + "limit": limit, + "offset": 0, + } + rsp = self.vector_client.vector_search(payload) + assert rsp['code'] == 1 + + @pytest.mark.parametrize("offset", [-1, 100_001]) + def test_search_vector_with_invalid_offset(self, offset): + """ + Search a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + dim = 128 + schema_payload, data = self.init_collection(name, dim=dim) + vector_field = schema_payload.get("vectorField") + # search data + dim = 128 + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + output_fields = get_common_fields_by_data(data, exclude_fields=[vector_field]) + payload = { + "collectionName": name, + "vector": vector_to_search, + "outputFields": output_fields, + "filter": "uid >= 0", + "limit": 100, + "offset": offset, + } + rsp = self.vector_client.vector_search(payload) + assert rsp['code'] == 1 + + +@pytest.mark.L0 +class TestQueryVector(TestBase): + + @pytest.mark.parametrize("expr", ["10+20 <= uid < 20+30", "uid in [1,2,3,4]", + "uid > 0", "uid >= 0", "uid > 0", + "uid > -100 and uid < 100"]) + @pytest.mark.parametrize("include_output_fields", [True, False]) + @pytest.mark.parametrize("partial_fields", [True, False]) + def test_query_vector_with_int64_filter(self, expr, include_output_fields, partial_fields): + """ + Query a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + schema_payload, data = self.init_collection(name) + output_fields = get_common_fields_by_data(data) + if partial_fields: + output_fields = output_fields[:len(output_fields) // 2] + if "uid" not in output_fields: + output_fields.append("uid") + else: + output_fields = output_fields + + # query data + payload = { + "collectionName": name, + "filter": expr, + "limit": 100, + "offset": 0, + "outputFields": output_fields + } + if not include_output_fields: + payload.pop("outputFields") + if 'vector' in output_fields: + output_fields.remove("vector") + time.sleep(5) + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + for r in res: + uid = r['uid'] + assert eval(expr) is True + for field in output_fields: + assert field in r + + @pytest.mark.parametrize("filter_expr", ["name > \"placeholder\"", "name like \"placeholder%\""]) + @pytest.mark.parametrize("include_output_fields", [True, False]) + def test_query_vector_with_varchar_filter(self, filter_expr, include_output_fields): + """ + Query a vector with a complex payload + """ + name = gen_collection_name() + self.name = name + nb = 200 + dim = 128 + limit = 100 + schema_payload, data = self.init_collection(name, dim=dim, nb=nb) + names = [] + for item in data: + names.append(item.get("name")) + names.sort() + logger.info(f"names: {names}") + mid = len(names) // 2 + prefix = names[mid][0:2] + # search data + output_fields = get_common_fields_by_data(data) + filter_expr = filter_expr.replace("placeholder", prefix) + logger.info(f"filter_expr: {filter_expr}") + payload = { + "collectionName": name, + "outputFields": output_fields, + "filter": filter_expr, + "limit": limit, + "offset": 0, + } + if not include_output_fields: + payload.pop("outputFields") + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + assert len(res) <= limit + for item in res: + name = item.get("name") + logger.info(f"name: {name}") + if ">" in filter_expr: + assert name > prefix + if "like" in filter_expr: + assert name.startswith(prefix) + + @pytest.mark.parametrize("sum_of_limit_offset", [16384]) + def test_query_vector_with_large_sum_of_limit_offset(self, sum_of_limit_offset): + """ + Query a vector with sum of limit and offset larger than max value + """ + max_sum_of_limit_offset = 16384 + name = gen_collection_name() + filter_expr = "name > \"placeholder\"" + self.name = name + nb = 200 + dim = 128 + limit = 100 + offset = sum_of_limit_offset - limit + schema_payload, data = self.init_collection(name, dim=dim, nb=nb) + names = [] + for item in data: + names.append(item.get("name")) + names.sort() + logger.info(f"names: {names}") + mid = len(names) // 2 + prefix = names[mid][0:2] + # search data + output_fields = get_common_fields_by_data(data) + filter_expr = filter_expr.replace("placeholder", prefix) + logger.info(f"filter_expr: {filter_expr}") + payload = { + "collectionName": name, + "outputFields": output_fields, + "filter": filter_expr, + "limit": limit, + "offset": offset, + } + rsp = self.vector_client.vector_query(payload) + if sum_of_limit_offset > max_sum_of_limit_offset: + assert rsp['code'] == 1 + return + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + assert len(res) <= limit + for item in res: + name = item.get("name") + logger.info(f"name: {name}") + if ">" in filter_expr: + assert name > prefix + if "like" in filter_expr: + assert name.startswith(prefix) + + +@pytest.mark.L0 +class TestGetVector(TestBase): + + def test_get_vector_with_simple_payload(self): + """ + Search a vector with a simple payload + """ + name = gen_collection_name() + self.name = name + self.init_collection(name) + + # search data + dim = 128 + vector_to_search = preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + payload = { + "collectionName": name, + "vector": vector_to_search, + } + rsp = self.vector_client.vector_search(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + limit = int(payload.get("limit", 100)) + assert len(res) == limit + ids = [item['id'] for item in res] + assert len(ids) == len(set(ids)) + payload = { + "collectionName": name, + "outputFields": ["*"], + "id": ids[0], + } + rsp = self.vector_client.vector_get(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {res}") + logger.info(f"res: {len(res)}") + for item in res: + assert item['id'] == ids[0] + + @pytest.mark.L0 + @pytest.mark.parametrize("id_field_type", ["list", "one"]) + @pytest.mark.parametrize("include_invalid_id", [True, False]) + @pytest.mark.parametrize("include_output_fields", [True, False]) + def test_get_vector_complex(self, id_field_type, include_output_fields, include_invalid_id): + name = gen_collection_name() + self.name = name + nb = 200 + dim = 128 + schema_payload, data = self.init_collection(name, dim=dim, nb=nb) + output_fields = get_common_fields_by_data(data) + uids = [] + for item in data: + uids.append(item.get("uid")) + payload = { + "collectionName": name, + "outputFields": output_fields, + "filter": f"uid in {uids}", + } + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + ids = [] + for r in res: + ids.append(r['id']) + logger.info(f"ids: {len(ids)}") + id_to_get = None + if id_field_type == "list": + id_to_get = ids + if id_field_type == "one": + id_to_get = ids[0] + if include_invalid_id: + if isinstance(id_to_get, list): + id_to_get[-1] = 0 + else: + id_to_get = 0 + # get by id list + payload = { + "collectionName": name, + "outputFields": output_fields, + "id": id_to_get + } + rsp = self.vector_client.vector_get(payload) + assert rsp['code'] == 200 + res = rsp['data'] + if isinstance(id_to_get, list): + if include_invalid_id: + assert len(res) == len(id_to_get) - 1 + else: + assert len(res) == len(id_to_get) + else: + if include_invalid_id: + assert len(res) == 0 + else: + assert len(res) == 1 + for r in rsp['data']: + if isinstance(id_to_get, list): + assert r['id'] in id_to_get + else: + assert r['id'] == id_to_get + if include_output_fields: + for field in output_fields: + assert field in r + + +@pytest.mark.L0 +class TestDeleteVector(TestBase): + + @pytest.mark.parametrize("include_invalid_id", [True, False]) + @pytest.mark.parametrize("id_field_type", ["list", "one"]) + def test_delete_vector_default(self, id_field_type, include_invalid_id): + name = gen_collection_name() + self.name = name + nb = 200 + dim = 128 + schema_payload, data = self.init_collection(name, dim=dim, nb=nb) + time.sleep(1) + output_fields = get_common_fields_by_data(data) + uids = [] + for item in data: + uids.append(item.get("uid")) + payload = { + "collectionName": name, + "outputFields": output_fields, + "filter": f"uid in {uids}", + } + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + ids = [] + for r in res: + ids.append(r['id']) + logger.info(f"ids: {len(ids)}") + id_to_get = None + if id_field_type == "list": + id_to_get = ids + if id_field_type == "one": + id_to_get = ids[0] + if include_invalid_id: + if isinstance(id_to_get, list): + id_to_get.append(0) + else: + id_to_get = 0 + if isinstance(id_to_get, list): + if len(id_to_get) >= 100: + id_to_get = id_to_get[-100:] + # delete by id list + payload = { + "collectionName": name, + "id": id_to_get + } + rsp = self.vector_client.vector_delete(payload) + assert rsp['code'] == 200 + logger.info(f"delete res: {rsp}") + + # verify data deleted + if not isinstance(id_to_get, list): + id_to_get = [id_to_get] + payload = { + "collectionName": name, + "filter": f"id in {id_to_get}", + } + time.sleep(5) + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + assert len(rsp['data']) == 0 + + def test_delete_vector_by_filter(self): + dim = 128 + nb = 3000 + insert_round = 1 + + name = gen_collection_name() + payload = { + "collectionName": name, + "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}}, + {"fieldName": "text_emb", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}} + ] + }, + "indexParams": [{"fieldName": "text_emb", "indexName": "text_emb_index", "metricType": "L2"}] + } + rsp = self.collection_client.collection_create(payload) + assert rsp['code'] == 200 + rsp = self.collection_client.collection_describe(name) + logger.info(f"rsp: {rsp}") + assert rsp['code'] == 200 + # insert data + for i in range(insert_round): + data = [] + for j in range(nb): + tmp = { + "book_id": i * nb + j, + "word_count": i * nb + j, + "book_describe": f"book_{i * nb + j}", + "text_emb": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + } + data.append(tmp) + payload = { + "collectionName": name, + "data": data, + } + body_size = sys.getsizeof(json.dumps(payload)) + logger.info(f"body size: {body_size / 1024 / 1024} MB") + rsp = self.vector_client.vector_insert(payload) + assert rsp['code'] == 200 + assert rsp['data']['insertCount'] == nb + # query data before delete + c = Collection(name) + res = c.query(expr="", output_fields=["count(*)"]) + logger.info(f"res: {res}") + + # delete data + payload = { + "collectionName": name, + "filter": "word_count >= 0", + } + rsp = self.vector_client.vector_delete(payload) + + # query data after delete + time.sleep(1) + res = c.query(expr="", output_fields=["count(*)"]) + logger.info(f"res: {res}") + assert res[0]["count(*)"] == 0 + + +@pytest.mark.L1 +class TestDeleteVectorNegative(TestBase): + def test_delete_vector_with_invalid_api_key(self): + """ + Delete a vector with an invalid api key + """ + name = gen_collection_name() + self.name = name + nb = 200 + dim = 128 + schema_payload, data = self.init_collection(name, dim=dim, nb=nb) + output_fields = get_common_fields_by_data(data) + uids = [] + for item in data: + uids.append(item.get("uid")) + payload = { + "collectionName": name, + "outputFields": output_fields, + "filter": f"uid in {uids}", + } + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + ids = [] + for r in res: + ids.append(r['id']) + logger.info(f"ids: {len(ids)}") + id_to_get = ids + # delete by id list + payload = { + "collectionName": name, + "id": id_to_get + } + client = self.vector_client + client.api_key = "invalid_api_key" + rsp = client.vector_delete(payload) + assert rsp['code'] == 1800 + + def test_delete_vector_with_invalid_collection_name(self): + """ + Delete a vector with an invalid collection name + """ + name = gen_collection_name() + self.name = name + self.init_collection(name, dim=128, nb=3000) + + # query data + # expr = f"id in {[i for i in range(10)]}".replace("[", "(").replace("]", ")") + expr = "id > 0" + payload = { + "collectionName": name, + "filter": expr, + "limit": 3000, + "offset": 0, + "outputFields": ["id", "uid"] + } + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + id_list = [r['id'] for r in res] + delete_expr = f"id in {[i for i in id_list[:10]]}" + # query data before delete + payload = { + "collectionName": name, + "filter": delete_expr, + "limit": 3000, + "offset": 0, + "outputFields": ["id", "uid"] + } + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + # delete data + payload = { + "collectionName": name + "_invalid", + "filter": delete_expr, + } + rsp = self.vector_client.vector_delete(payload) + assert rsp['code'] == 1 + + def test_delete_vector_with_non_primary_key(self): + """ + Delete a vector with a non-primary key, expect no data were deleted + """ + name = gen_collection_name() + self.name = name + self.init_collection(name, dim=128, nb=300) + expr = "uid > 0" + payload = { + "collectionName": name, + "filter": expr, + "limit": 3000, + "offset": 0, + "outputFields": ["id", "uid"] + } + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + logger.info(f"res: {len(res)}") + id_list = [r['uid'] for r in res] + delete_expr = f"uid in {[i for i in id_list[:10]]}" + # query data before delete + payload = { + "collectionName": name, + "filter": delete_expr, + "limit": 3000, + "offset": 0, + "outputFields": ["id", "uid"] + } + rsp = self.vector_client.vector_query(payload) + assert rsp['code'] == 200 + res = rsp['data'] + num_before_delete = len(res) + logger.info(f"res: {len(res)}") + # delete data + payload = { + "collectionName": name, + "filter": delete_expr, + } + rsp = self.vector_client.vector_delete(payload) + # query data after delete + payload = { + "collectionName": name, + "filter": delete_expr, + "limit": 3000, + "offset": 0, + "outputFields": ["id", "uid"] + } + time.sleep(1) + rsp = self.vector_client.vector_query(payload) + assert len(rsp["data"]) == num_before_delete diff --git a/tests/restful_client_v2/utils/constant.py b/tests/restful_client_v2/utils/constant.py new file mode 100644 index 0000000000..adeb3c8b2c --- /dev/null +++ b/tests/restful_client_v2/utils/constant.py @@ -0,0 +1,2 @@ + +MAX_SUM_OFFSET_AND_LIMIT = 16384 diff --git a/tests/restful_client_v2/utils/util_log.py b/tests/restful_client_v2/utils/util_log.py new file mode 100644 index 0000000000..e2e9b5c5ac --- /dev/null +++ b/tests/restful_client_v2/utils/util_log.py @@ -0,0 +1,56 @@ +import logging +import sys + +from config.log_config import log_config + + +class TestLog: + def __init__(self, logger, log_debug, log_file, log_err, log_worker): + self.logger = logger + self.log_debug = log_debug + self.log_file = log_file + self.log_err = log_err + self.log_worker = log_worker + + self.log = logging.getLogger(self.logger) + self.log.setLevel(logging.DEBUG) + + try: + formatter = logging.Formatter("[%(asctime)s - %(levelname)s - %(name)s]: " + "%(message)s (%(filename)s:%(lineno)s)") + # [%(process)s] process NO. + dh = logging.FileHandler(self.log_debug) + dh.setLevel(logging.DEBUG) + dh.setFormatter(formatter) + self.log.addHandler(dh) + + fh = logging.FileHandler(self.log_file) + fh.setLevel(logging.INFO) + fh.setFormatter(formatter) + self.log.addHandler(fh) + + eh = logging.FileHandler(self.log_err) + eh.setLevel(logging.ERROR) + eh.setFormatter(formatter) + self.log.addHandler(eh) + + if self.log_worker != "": + wh = logging.FileHandler(self.log_worker) + wh.setLevel(logging.DEBUG) + wh.setFormatter(formatter) + self.log.addHandler(wh) + + ch = logging.StreamHandler(sys.stdout) + ch.setLevel(logging.DEBUG) + ch.setFormatter(formatter) + + except Exception as e: + print("Can not use %s or %s or %s to log. error : %s" % (log_debug, log_file, log_err, str(e))) + + +"""All modules share this unified log""" +log_debug = log_config.log_debug +log_info = log_config.log_info +log_err = log_config.log_err +log_worker = log_config.log_worker +test_log = TestLog('ci_test', log_debug, log_info, log_err, log_worker).log diff --git a/tests/restful_client_v2/utils/utils.py b/tests/restful_client_v2/utils/utils.py new file mode 100644 index 0000000000..013f72e08d --- /dev/null +++ b/tests/restful_client_v2/utils/utils.py @@ -0,0 +1,228 @@ +import random +import time +import random +import string +from faker import Faker +import numpy as np +import jax.numpy as jnp +from sklearn import preprocessing +import base64 +import requests +from loguru import logger +import datetime + +fake = Faker() + + +def random_string(length=8): + letters = string.ascii_letters + return ''.join(random.choice(letters) for _ in range(length)) + + +def gen_collection_name(prefix="test_collection", length=8): + name = f'{prefix}_' + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f") + random_string(length=length) + return name + +def admin_password(): + return "Milvus" + + +def gen_unique_str(prefix="test", length=8): + return prefix + "_" + random_string(length=length) + + +def invalid_cluster_name(): + res = [ + "demo" * 100, + "demo" + "!", + "demo" + "@", + ] + return res + + +def wait_cluster_be_ready(cluster_id, client, timeout=120): + t0 = time.time() + while True and time.time() - t0 < timeout: + rsp = client.cluster_describe(cluster_id) + if rsp['code'] == 200: + if rsp['data']['status'] == "RUNNING": + return time.time() - t0 + time.sleep(1) + logger.debug("wait cluster to be ready, cost time: %s" % (time.time() - t0)) + return -1 + + + + + +def gen_data_by_type(field): + data_type = field["type"] + if data_type == "bool": + return random.choice([True, False]) + if data_type == "int8": + return random.randint(-128, 127) + if data_type == "int16": + return random.randint(-32768, 32767) + if data_type == "int32": + return random.randint(-2147483648, 2147483647) + if data_type == "int64": + return random.randint(-9223372036854775808, 9223372036854775807) + if data_type == "float32": + return np.float64(random.random()) # Object of type float32 is not JSON serializable, so set it as float64 + if data_type == "float64": + return np.float64(random.random()) + if "varchar" in data_type: + length = int(data_type.split("(")[1].split(")")[0]) + return "".join([chr(random.randint(97, 122)) for _ in range(length)]) + if "floatVector" in data_type: + dim = int(data_type.split("(")[1].split(")")[0]) + return preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + return None + + +def get_data_by_fields(fields, nb): + # logger.info(f"fields: {fields}") + fields_not_auto_id = [] + for field in fields: + if not field.get("autoId", False): + fields_not_auto_id.append(field) + # logger.info(f"fields_not_auto_id: {fields_not_auto_id}") + data = [] + for i in range(nb): + tmp = {} + for field in fields_not_auto_id: + tmp[field["name"]] = gen_data_by_type(field) + data.append(tmp) + return data + + +def get_random_json_data(uid=None): + # gen random dict data + if uid is None: + uid = 0 + data = {"uid": uid, "name": fake.name(), "address": fake.address(), "text": fake.text(), "email": fake.email(), + "phone_number": fake.phone_number(), + "json": { + "name": fake.name(), + "address": fake.address() + } + } + for i in range(random.randint(1, 10)): + data["key" + str(random.randint(1, 100_000))] = "value" + str(random.randint(1, 100_000)) + return data + + +def get_data_by_payload(payload, nb=100): + dim = payload.get("dimension", 128) + vector_field = payload.get("vectorField", "vector") + data = [] + if nb == 1: + data = [{ + vector_field: preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist(), + **get_random_json_data() + + }] + else: + for i in range(nb): + data.append({ + vector_field: preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist(), + **get_random_json_data(uid=i) + }) + return data + + +def get_common_fields_by_data(data, exclude_fields=None): + fields = set() + if isinstance(data, dict): + data = [data] + if not isinstance(data, list): + raise Exception("data must be list or dict") + common_fields = set(data[0].keys()) + for d in data: + keys = set(d.keys()) + common_fields = common_fields.intersection(keys) + if exclude_fields is not None: + exclude_fields = set(exclude_fields) + common_fields = common_fields.difference(exclude_fields) + return list(common_fields) + + +def gen_binary_vectors(num, dim): + raw_vectors = [] + binary_vectors = [] + for _ in range(num): + raw_vector = [random.randint(0, 1) for _ in range(dim)] + raw_vectors.append(raw_vector) + # packs a binary-valued array into bits in a unit8 array, and bytes array_of_ints + binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist())) + return raw_vectors, binary_vectors + + +def gen_fp16_vectors(num, dim): + """ + generate float16 vector data + raw_vectors : the vectors + fp16_vectors: the bytes used for insert + return: raw_vectors and fp16_vectors + """ + raw_vectors = [] + fp16_vectors = [] + for _ in range(num): + raw_vector = [random.random() for _ in range(dim)] + raw_vectors.append(raw_vector) + fp16_vector = np.array(raw_vector, dtype=np.float16).view(np.uint8).tolist() + fp16_vectors.append(bytes(fp16_vector)) + + return raw_vectors, fp16_vectors + + +def gen_bf16_vectors(num, dim): + """ + generate brain float16 vector data + raw_vectors : the vectors + bf16_vectors: the bytes used for insert + return: raw_vectors and bf16_vectors + """ + raw_vectors = [] + bf16_vectors = [] + for _ in range(num): + raw_vector = [random.random() for _ in range(dim)] + raw_vectors.append(raw_vector) + bf16_vector = np.array(jnp.array(raw_vector, dtype=jnp.bfloat16)).view(np.uint8).tolist() + bf16_vectors.append(bytes(bf16_vector)) + + return raw_vectors, bf16_vectors + + +def gen_vector(datatype="float_vector", dim=128, binary_data=False): + value = None + if datatype == "FloatVector": + return preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist() + if datatype == "BinaryVector": + value = gen_binary_vectors(1, dim)[1][0] + if datatype == "Float16Vector": + value = gen_fp16_vectors(1, dim)[1][0] + if datatype == "BFloat16Vector": + value = gen_bf16_vectors(1, dim)[1][0] + if value is None: + raise Exception(f"unsupported datatype: {datatype}") + else: + if binary_data: + return value + else: + data = base64.b64encode(value).decode("utf-8") + return data + + +def get_all_fields_by_data(data, exclude_fields=None): + fields = set() + for d in data: + keys = list(d.keys()) + fields.union(keys) + if exclude_fields is not None: + exclude_fields = set(exclude_fields) + fields = fields.difference(exclude_fields) + return list(fields) + + +