[skip e2e] Update test cases for binary metrics (#18297)

Signed-off-by: Binbin Lv <binbin.lv@zilliz.com>
2026-01-07 19:31:51 +08:00 · 2022-07-19 08:58:28 +08:00 · 2022-07-19 08:58:28 +08:00 · 048d8a1114
commit 048d8a1114
parent 1efb413b26
1 changed files with 33 additions and 27 deletions
--- a/tests/python_client/testcases/test_search.py
+++ b/tests/python_client/testcases/test_search.py
@ -1626,29 +1626,32 @@ class TestCollectionSearch(TestcaseBase):
        assert abs(res[0].distances[0] - min(distance_0, distance_1)) <= epsilon

    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.xfail(reason="issue 18283")
    @pytest.mark.parametrize("index", ["BIN_FLAT"])
-    def test_search_binary_substructure_flat_index(self, nq, dim, auto_id, _async, index, is_flush):
+    def test_search_binary_substructure_flat_index(self, auto_id, _async, index, is_flush):
        """
        target: search binary_collection, and check the result: distance
-        method: compare the return distance value with value computed with SUBSTRUCTURE
+        method: compare the return distance value with value computed with SUBSTRUCTURE.
+                (1) The returned limit(topK) are impacted by dimension (dim) of data.
+                (2) Searched topK is smaller than set limit when dim is large
+                (3) it does not support "BIN_IVF_FLAT" index
        expected: the return distance equals to the computed value
        """
        # 1. initialize with binary data
-        collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2,
-                                                                                                  is_binary=True,
-                                                                                                  auto_id=auto_id,
-                                                                                                  dim=dim,
-                                                                                                  is_index=True,
-                                                                                                  is_flush=is_flush)[0:5]
+        nq = 1
+        dim = 8
+        collection_w, _, binary_raw_vector, insert_ids, time_stamp \
+            = self.init_collection_general(prefix, True, default_nb, is_binary=True, auto_id=auto_id,
+                                           dim=dim, is_index=True, is_flush=is_flush)[0:5]
        # 2. create index
        default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "SUBSTRUCTURE"}
        collection_w.create_index("binary_vector", default_index)
        collection_w.load()
        # 3. compute the distance
-        query_raw_vector, binary_vectors = cf.gen_binary_vectors(3000, dim)
-        distance_0 = cf.substructure(query_raw_vector[0], binary_raw_vector[0])
-        distance_1 = cf.substructure(query_raw_vector[0], binary_raw_vector[1])
+        query_raw_vector, binary_vectors = cf.gen_binary_vectors(nq, dim)
+        distance_min = 1
+        for binary_raw in binary_raw_vector:
+            distance = cf.substructure(query_raw_vector[0], binary_raw)
+            distance_min = min(distance, distance_min)
        # 4. search and compare the distance
        search_params = {"metric_type": "SUBSTRUCTURE", "params": {"nprobe": 10}}
        res = collection_w.search(binary_vectors[:nq], "binary_vector",
@ -1658,37 +1661,40 @@ class TestCollectionSearch(TestcaseBase):
                                  check_task=CheckTasks.check_search_results,
                                  check_items={"nq": nq,
                                               "ids": insert_ids,
-                                               "limit": 2,
+                                               "limit": default_limit,
                                               "_async": _async})[0]
        if _async:
            res.done()
            res = res.result()
-        assert abs(res[0].distances[0] - min(distance_0, distance_1)) <= epsilon
+        assert abs(res[0].distances[0] - distance_min) <= epsilon

    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.xfail(reason="issue 18283")
    @pytest.mark.parametrize("index", ["BIN_FLAT"])
-    def test_search_binary_superstructure_flat_index(self, nq, dim, auto_id, _async, index, is_flush):
+    def test_search_binary_superstructure_flat_index(self, auto_id, _async, index, is_flush):
        """
        target: search binary_collection, and check the result: distance
        method: compare the return distance value with value computed with SUPERSTRUCTURE
+                (1) The returned limit(topK) are impacted by dimension (dim) of data.
+                (2) Searched topK is smaller than set limit when dim is large
+                (3) it does not support "BIN_IVF_FLAT" index
        expected: the return distance equals to the computed value
        """
        # 1. initialize with binary data
-        collection_w, _, binary_raw_vector, insert_ids, time_stamp = self.init_collection_general(prefix, True, 2,
-                                                                                                  is_binary=True,
-                                                                                                  auto_id=auto_id,
-                                                                                                  dim=dim,
-                                                                                                  is_index=True,
-                                                                                                  is_flush=is_flush)[0:5]
+        nq = 1
+        dim = 8
+        collection_w, _, binary_raw_vector, insert_ids, time_stamp \
+            = self.init_collection_general(prefix, True, default_nb, is_binary=True, auto_id=auto_id,
+                                           dim=dim, is_index=True, is_flush=is_flush)[0:5]
        # 2. create index
        default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "SUPERSTRUCTURE"}
        collection_w.create_index("binary_vector", default_index)
        collection_w.load()
        # 3. compute the distance
-        query_raw_vector, binary_vectors = cf.gen_binary_vectors(3000, dim)
-        distance_0 = cf.superstructure(query_raw_vector[0], binary_raw_vector[0])
-        distance_1 = cf.superstructure(query_raw_vector[0], binary_raw_vector[1])
+        query_raw_vector, binary_vectors = cf.gen_binary_vectors(nq, dim)
+        distance_min = 1
+        for binary_raw in binary_raw_vector:
+            distance = cf.superstructure(query_raw_vector[0], binary_raw)
+            distance_min = min(distance, distance_min)
        # 4. search and compare the distance
        search_params = {"metric_type": "SUPERSTRUCTURE", "params": {"nprobe": 10}}
        res = collection_w.search(binary_vectors[:nq], "binary_vector",
@ -1698,12 +1704,12 @@ class TestCollectionSearch(TestcaseBase):
                                  check_task=CheckTasks.check_search_results,
                                  check_items={"nq": nq,
                                               "ids": insert_ids,
-                                               "limit": 2,
+                                               "limit": default_limit,
                                               "_async": _async})[0]
        if _async:
            res.done()
            res = res.result()
-        assert abs(res[0].distances[0] - min(distance_0, distance_1)) <= epsilon
+        assert abs(res[0].distances[0] - distance_min) <= epsilon

    @pytest.mark.tags(CaseLabel.L2)
    def test_search_binary_without_flush(self, metrics, auto_id):