From 3e4c9ae632b031ca0f0bf45194a03e134254ccdf Mon Sep 17 00:00:00 2001 From: del-zhenwu <56623710+del-zhenwu@users.noreply.github.com> Date: Sat, 26 Sep 2020 10:50:51 +0800 Subject: [PATCH] add hnsw pq/sq case (#3863) * add hnsw pq/sq case Signed-off-by: zw * change some cases from l2 to l1 Signed-off-by: zw * disable hnsw_pq/sq in 0.11.0 Signed-off-by: zw * fix groovy Signed-off-by: zw Co-authored-by: zw --- .../modules/DevTest/SingleNodeDevTest.groovy | 7 ++++--- .../collection/test_collection_stats.py | 2 ++ .../collection/test_create_collection.py | 1 + .../entity/test_list_id_in_segment.py | 1 + tests/milvus_python_test/test_index.py | 3 ++- tests/milvus_python_test/test_partition.py | 5 +++-- tests/milvus_python_test/utils.py | 18 +++++++++++++++--- 7 files changed, 28 insertions(+), 9 deletions(-) diff --git a/.jenkins/modules/DevTest/SingleNodeDevTest.groovy b/.jenkins/modules/DevTest/SingleNodeDevTest.groovy index ca5bb9e17d..9b7821771b 100644 --- a/.jenkins/modules/DevTest/SingleNodeDevTest.groovy +++ b/.jenkins/modules/DevTest/SingleNodeDevTest.groovy @@ -11,7 +11,7 @@ timeout(time: 150, unit: 'MINUTES') { retry(3) { try { dir ('charts/milvus') { - writeFile file: 'test.yaml', text: "extraConfiguration:\n engine:\n build_index_threshold: 1000" + writeFile file: 'test.yaml', text: "extraConfiguration:\n engine:\n build_index_threshold: 1000\n max_partition_num: 256" sh "helm install --wait --timeout 300s --set image.repository=registry.zilliz.com/milvus/engine --set image.tag=${DOCKER_VERSION} --set image.pullPolicy=Always --set service.type=ClusterIP -f ci/db_backend/mysql_${BINARY_VERSION}_values.yaml -f ci/filebeat/values.yaml -f test.yaml --set image.resources.limits.cpu=4.0 --namespace milvus ${env.HELM_RELEASE_NAME} ." } } catch (exc) { @@ -44,7 +44,8 @@ timeout(time: 150, unit: 'MINUTES') { retry(3) { try { dir ("milvus-helm/charts/milvus") { - sh "helm install --wait --timeout 300s --set image.repository=registry.zilliz.com/milvus/engine --set image.tag=${DOCKER_VERSION} --set image.pullPolicy=Always --set service.type=ClusterIP --set image.resources.requests.memory=8Gi --set image.resources.requests.cpu=2.0 --set image.resources.limits.memory=12Gi --set image.resources.limits.cpu=4.0 -f ci/db_backend/sqlite_${BINARY_VERSION}_values.yaml -f ci/filebeat/values.yaml --namespace milvus ${env.HELM_RELEASE_NAME} ." + writeFile file: 'test.yaml', text: "extraConfiguration:\n engine:\n build_index_threshold: 1000\n max_partition_num: 256" + sh "helm install --wait --timeout 300s --set image.repository=registry.zilliz.com/milvus/engine --set image.tag=${DOCKER_VERSION} --set image.pullPolicy=Always --set service.type=ClusterIP --set image.resources.requests.memory=8Gi --set image.resources.requests.cpu=2.0 --set image.resources.limits.memory=12Gi --set image.resources.limits.cpu=4.0 -f ci/db_backend/sqlite_${BINARY_VERSION}_values.yaml -f ci/filebeat/values.yaml -f test.yaml --namespace milvus ${env.HELM_RELEASE_NAME} ." } } catch (exc) { def helmStatusCMD = "helm get manifest --namespace milvus ${env.HELM_RELEASE_NAME} | kubectl describe -n milvus -f - && \ @@ -57,7 +58,7 @@ timeout(time: 150, unit: 'MINUTES') { } dir ("tests/milvus_python_test") { sh "pytest . --level=2 --alluredir=\"test_out/dev/single/sqlite\" --ip ${env.HELM_RELEASE_NAME}.milvus.svc.cluster.local >> ${WORKSPACE}/${env.DEV_TEST_ARTIFACTS}/milvus_${BINARY_VERSION}_sqlite_dev_test.log" - sh "pytest . --level=1 --ip ${env.HELM_RELEASE_NAME}.milvus.svc.cluster.local --port=19121 --handler=HTTP >> ${WORKSPACE}/${env.DEV_TEST_ARTIFACTS}/milvus_${BINARY_VERSION}_sqlite_http_dev_test.log" + // sh "pytest . --level=1 --ip ${env.HELM_RELEASE_NAME}.milvus.svc.cluster.local --port=19121 --handler=HTTP >> ${WORKSPACE}/${env.DEV_TEST_ARTIFACTS}/milvus_${BINARY_VERSION}_sqlite_http_dev_test.log" } } } diff --git a/tests/milvus_python_test/collection/test_collection_stats.py b/tests/milvus_python_test/collection/test_collection_stats.py index f9866a254e..5da43f7684 100644 --- a/tests/milvus_python_test/collection/test_collection_stats.py +++ b/tests/milvus_python_test/collection/test_collection_stats.py @@ -56,6 +56,7 @@ class TestStatsBase: with pytest.raises(Exception) as e: stats = connect.get_collection_stats(collection_name) + @pytest.mark.level(2) def test_get_collection_stats_name_invalid(self, connect, get_collection_name): ''' target: get collection stats where collection name is invalid @@ -217,6 +218,7 @@ class TestStatsBase: connect.flush([collection]) connect.create_index(collection, default_float_vec_field_name, get_simple_index) stats = connect.get_collection_stats(collection) + logging.getLogger().info(stats) assert stats["row_count"] == default_nb for file in stats["partitions"][0]["segments"][0]["files"]: if file["field"] == default_float_vec_field_name and file["name"] != "_raw": diff --git a/tests/milvus_python_test/collection/test_create_collection.py b/tests/milvus_python_test/collection/test_create_collection.py index 1ddd38d7d9..d40e0569c7 100644 --- a/tests/milvus_python_test/collection/test_create_collection.py +++ b/tests/milvus_python_test/collection/test_create_collection.py @@ -306,6 +306,7 @@ class TestCreateCollectionInvalid(object): connect.create_collection(collection_name, fields) # TODO: assert exception + @pytest.mark.level(2) def test_create_collection_invalid_field_name(self, connect, get_invalid_string): collection_name = gen_unique_str(uid) fields = copy.deepcopy(default_fields) diff --git a/tests/milvus_python_test/entity/test_list_id_in_segment.py b/tests/milvus_python_test/entity/test_list_id_in_segment.py index 647957bf15..f08250fd5a 100644 --- a/tests/milvus_python_test/entity/test_list_id_in_segment.py +++ b/tests/milvus_python_test/entity/test_list_id_in_segment.py @@ -62,6 +62,7 @@ class TestListIdInSegmentBase: def get_collection_name(self, request): yield request.param + @pytest.mark.level(2) def test_list_id_in_segment_collection_name_invalid(self, connect, collection, get_collection_name): ''' target: get vector ids where collection name is invalid diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 5d7962a486..9ec34f3b56 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -726,7 +726,7 @@ class TestIndexInvalid(object): def get_index(self, request): yield request.param - @pytest.mark.level(1) + @pytest.mark.level(2) def test_create_index_with_invalid_index_params(self, connect, collection, get_index): logging.getLogger().info(get_index) with pytest.raises(Exception) as e: @@ -793,6 +793,7 @@ class TestIndexAsync: logging.getLogger().info("DROP") connect.drop_collection(collection) + @pytest.mark.level(2) def test_create_index_with_invalid_collectionname(self, connect): collection_name = " " future = connect.create_index(collection_name, field_name, default_index, _async=True) diff --git a/tests/milvus_python_test/test_partition.py b/tests/milvus_python_test/test_partition.py index 4cd401cf95..ec9a2420bb 100644 --- a/tests/milvus_python_test/test_partition.py +++ b/tests/milvus_python_test/test_partition.py @@ -26,7 +26,7 @@ class TestCreateBase: # TODO: enable @pytest.mark.level(2) - @pytest.mark.timeout(1200) + @pytest.mark.timeout(600) def test_create_partition_limit(self, connect, collection, args): ''' target: test create partitions, check status returned @@ -39,7 +39,7 @@ class TestCreateBase: pytest.skip("skip in http mode") def create(connect, threads_num): - for i in range(4096 // threads_num): + for i in range(max_partition_num // threads_num): tag_tmp = gen_unique_str() connect.create_partition(collection, tag_tmp) @@ -373,6 +373,7 @@ class TestNameInvalid(object): with pytest.raises(Exception) as e: connect.drop_partition(collection_name, default_tag) + @pytest.mark.level(2) def test_drop_partition_with_invalid_tag_name(self, connect, collection, get_tag_name): ''' target: test drop partition, with invalid tag name, check status returned diff --git a/tests/milvus_python_test/utils.py b/tests/milvus_python_test/utils.py index ed13a7ebf1..7f0d085477 100644 --- a/tests/milvus_python_test/utils.py +++ b/tests/milvus_python_test/utils.py @@ -22,6 +22,7 @@ default_dim = 128 default_nb = 1200 default_top_k = 10 max_top_k = 16384 +max_partition_num = 256 default_segment_row_limit = 1000 default_server_segment_row_limit = 1024 * 512 default_float_vec_field_name = "float_vector" @@ -30,6 +31,7 @@ default_partition_name = "_default" default_tag = "1970_01_01" # TODO: +# TODO: disable RHNSW_SQ/PQ in 0.11.0 all_index_types = [ "FLAT", "IVF_FLAT", @@ -39,6 +41,8 @@ all_index_types = [ "HNSW", # "NSG", "ANNOY", + # "RHNSW_PQ", + # "RHNSW_SQ", "BIN_FLAT", "BIN_IVF_FLAT" ] @@ -52,6 +56,8 @@ default_index_params = [ {"M": 48, "efConstruction": 500}, # {"search_length": 50, "out_degree": 40, "candidate_pool_size": 100, "knng": 50}, {"n_trees": 50}, + # {"M": 48, "efConstruction": 500, "PQM": 16}, + # {"M": 48, "efConstruction": 500}, {"nlist": 128}, {"nlist": 128} ] @@ -640,7 +646,7 @@ def gen_invaild_search_params(): for nprobe in gen_invalid_params(): ivf_search_params = {"index_type": index_type, "search_params": {"nprobe": nprobe}} search_params.append(ivf_search_params) - elif index_type == "HNSW": + elif index_type in ["HNSW", "RHNSW_PQ", "RHNSW_SQ"]: for ef in gen_invalid_params(): hnsw_search_param = {"index_type": index_type, "search_params": {"ef": ef}} search_params.append(hnsw_search_param) @@ -668,9 +674,13 @@ def gen_invalid_index(): index_params.append(index_param) for M in gen_invalid_params(): index_param = {"index_type": "HNSW", "params": {"M": M, "efConstruction": 100}} + index_param = {"index_type": "RHNSW_PQ", "params": {"M": M, "efConstruction": 100}} + index_param = {"index_type": "RHNSW_SQ", "params": {"M": M, "efConstruction": 100}} index_params.append(index_param) for efConstruction in gen_invalid_params(): index_param = {"index_type": "HNSW", "params": {"M": 16, "efConstruction": efConstruction}} + index_param = {"index_type": "RHNSW_PQ", "params": {"M": 16, "efConstruction": efConstruction}} + index_param = {"index_type": "RHNSW_SQ", "params": {"M": 16, "efConstruction": efConstruction}} index_params.append(index_param) for search_length in gen_invalid_params(): index_param = {"index_type": "NSG", @@ -689,6 +699,8 @@ def gen_invalid_index(): index_params.append(index_param) index_params.append({"index_type": "IVF_FLAT", "params": {"invalid_key": 1024}}) index_params.append({"index_type": "HNSW", "params": {"invalid_key": 16, "efConstruction": 100}}) + index_params.append({"index_type": "RHNSW_PQ", "params": {"invalid_key": 16, "efConstruction": 100}}) + index_params.append({"index_type": "RHNSW_SQ", "params": {"invalid_key": 16, "efConstruction": 100}}) index_params.append({"index_type": "NSG", "params": {"invalid_key": 100, "out_degree": 40, "candidate_pool_size": 300, "knng": 100}}) @@ -721,7 +733,7 @@ def gen_index(): for nlist in nlists \ for m in pq_ms] index_params.extend(IVFPQ_params) - elif index_type == "HNSW": + elif index_type in ["HNSW", "RHNSW_SQ", "RHNSW_PQ"]: hnsw_params = [{"index_type": index_type, "index_param": {"M": M, "efConstruction": efConstruction}} \ for M in Ms \ for efConstruction in efConstructions] @@ -764,7 +776,7 @@ def get_search_param(index_type, metric_type="L2"): search_params = {"metric_type": metric_type} if index_type in ivf() or index_type in binary_support(): search_params.update({"nprobe": 64}) - elif index_type == "HNSW": + elif index_type in ["HNSW", "RHNSW_SQ", "RHNSW_PQ"]: search_params.update({"ef": 64}) elif index_type == "NSG": search_params.update({"search_length": 100})