From 048d8a11142c8dfb0bfa1b8ec9cd52ad2ead0f76 Mon Sep 17 00:00:00 2001 From: binbin <83755740+binbinlv@users.noreply.github.com> Date: Tue, 19 Jul 2022 08:58:28 +0800 Subject: [PATCH] [skip e2e] Update test cases for binary metrics (#18297) Signed-off-by: Binbin Lv --- tests/python_client/testcases/test_search.py | 60 +++++++++++--------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 7f1a525727..8abf10dd84 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -1626,29 +1626,32 @@ class TestCollectionSearch(TestcaseBase): assert abs(res[0].distances[0] - min(distance_0, distance_1)) <= epsilon @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.xfail(reason="issue 18283") @pytest.mark.parametrize("index", ["BIN_FLAT"]) - def test_search_binary_substructure_flat_index(self, nq, dim, auto_id, _async, index, is_flush): + def test_search_binary_substructure_flat_index(self, auto_id, _async, index, is_flush): """ target: search binary_collection, and check the result: distance - method: compare the return distance value with value computed with SUBSTRUCTURE + method: compare the return distance value with value computed with SUBSTRUCTURE. + (1) The returned limit(topK) are impacted by dimension (dim) of data. + (2) Searched topK is smaller than set limit when dim is large + (3) it does not support "BIN_IVF_FLAT" index expected: the return distance equals to the computed value """ # 1. initialize with binary data - collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2, - is_binary=True, - auto_id=auto_id, - dim=dim, - is_index=True, - is_flush=is_flush)[0:5] + nq = 1 + dim = 8 + collection_w, _, binary_raw_vector, insert_ids, time_stamp \ + = self.init_collection_general(prefix, True, default_nb, is_binary=True, auto_id=auto_id, + dim=dim, is_index=True, is_flush=is_flush)[0:5] # 2. create index default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "SUBSTRUCTURE"} collection_w.create_index("binary_vector", default_index) collection_w.load() # 3. compute the distance - query_raw_vector, binary_vectors = cf.gen_binary_vectors(3000, dim) - distance_0 = cf.substructure(query_raw_vector[0], binary_raw_vector[0]) - distance_1 = cf.substructure(query_raw_vector[0], binary_raw_vector[1]) + query_raw_vector, binary_vectors = cf.gen_binary_vectors(nq, dim) + distance_min = 1 + for binary_raw in binary_raw_vector: + distance = cf.substructure(query_raw_vector[0], binary_raw) + distance_min = min(distance, distance_min) # 4. search and compare the distance search_params = {"metric_type": "SUBSTRUCTURE", "params": {"nprobe": 10}} res = collection_w.search(binary_vectors[:nq], "binary_vector", @@ -1658,37 +1661,40 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": 2, + "limit": default_limit, "_async": _async})[0] if _async: res.done() res = res.result() - assert abs(res[0].distances[0] - min(distance_0, distance_1)) <= epsilon + assert abs(res[0].distances[0] - distance_min) <= epsilon @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.xfail(reason="issue 18283") @pytest.mark.parametrize("index", ["BIN_FLAT"]) - def test_search_binary_superstructure_flat_index(self, nq, dim, auto_id, _async, index, is_flush): + def test_search_binary_superstructure_flat_index(self, auto_id, _async, index, is_flush): """ target: search binary_collection, and check the result: distance method: compare the return distance value with value computed with SUPERSTRUCTURE + (1) The returned limit(topK) are impacted by dimension (dim) of data. + (2) Searched topK is smaller than set limit when dim is large + (3) it does not support "BIN_IVF_FLAT" index expected: the return distance equals to the computed value """ # 1. initialize with binary data - collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2, - is_binary=True, - auto_id=auto_id, - dim=dim, - is_index=True, - is_flush=is_flush)[0:5] + nq = 1 + dim = 8 + collection_w, _, binary_raw_vector, insert_ids, time_stamp \ + = self.init_collection_general(prefix, True, default_nb, is_binary=True, auto_id=auto_id, + dim=dim, is_index=True, is_flush=is_flush)[0:5] # 2. create index default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "SUPERSTRUCTURE"} collection_w.create_index("binary_vector", default_index) collection_w.load() # 3. compute the distance - query_raw_vector, binary_vectors = cf.gen_binary_vectors(3000, dim) - distance_0 = cf.superstructure(query_raw_vector[0], binary_raw_vector[0]) - distance_1 = cf.superstructure(query_raw_vector[0], binary_raw_vector[1]) + query_raw_vector, binary_vectors = cf.gen_binary_vectors(nq, dim) + distance_min = 1 + for binary_raw in binary_raw_vector: + distance = cf.superstructure(query_raw_vector[0], binary_raw) + distance_min = min(distance, distance_min) # 4. search and compare the distance search_params = {"metric_type": "SUPERSTRUCTURE", "params": {"nprobe": 10}} res = collection_w.search(binary_vectors[:nq], "binary_vector", @@ -1698,12 +1704,12 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": 2, + "limit": default_limit, "_async": _async})[0] if _async: res.done() res = res.result() - assert abs(res[0].distances[0] - min(distance_0, distance_1)) <= epsilon + assert abs(res[0].distances[0] - distance_min) <= epsilon @pytest.mark.tags(CaseLabel.L2) def test_search_binary_without_flush(self, metrics, auto_id):