test: add test cases for json modulo operation (#43191)

/kind improvement Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
2026-01-07 19:31:51 +08:00 · 2025-07-10 17:08:48 +08:00 · 2025-07-10 17:08:48 +08:00 · 85c8049296
commit 85c8049296
parent b2597c6329
1 changed files with 127 additions and 1 deletions
--- a/tests/python_client/milvus_client/test_milvus_client_query.py
+++ b/tests/python_client/milvus_client/test_milvus_client_query.py
@ -1,5 +1,4 @@
 import pytest
-
 from base.client_v2_base import TestMilvusClientV2Base
 from utils.util_log import test_log as log
 from common import common_func as cf
@ -333,6 +332,133 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base):
                 ct.err_msg: "the sample factor should be between 0 and 1 and not too close to 0 or 1"}
        self.query(client, collection_name, filter=expr,
                   check_task=CheckTasks.err_res, check_items=error)
+    
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_milvus_client_query_json_modulo_operator(self):
+        """
+        target: test query with modulo operator on JSON field values
+        method: create collection with JSON field, insert data with numeric values, test modulo filters
+        expected: modulo operations should work correctly and return all matching results
+        """
+        client = self._client()
+        collection_name = cf.gen_collection_name_by_testcase_name()
+        # 1. create collection with JSON field
+        schema, _ = self.create_schema(client, enable_dynamic_field=False)
+        schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
+        schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
+        schema.add_field("json_field", DataType.JSON, nullable=True)
+        index_params, _ = self.prepare_index_params(client)
+        index_params.add_index(default_vector_field_name, metric_type="COSINE")
+        self.create_collection(client, collection_name, schema=schema, index_params=index_params, consistency_level="Strong")
+        
+        # 2. insert 3000 rows with various numeric values
+        nb = 3000
+        rng = np.random.default_rng(seed=19530)
+        
+        # Store original data for verification
+        rows = []
+        all_numbers = []
+        all_nested_numbers = []
+        
+        for i in range(nb):
+            # Generate diverse numeric values
+            if i % 5 == 0:
+                # Some large numbers
+                numeric_value = random.randint(100000, 999999)
+            elif i % 3 == 0:
+                # Some medium numbers
+                numeric_value = random.randint(1000, 9999)
+            else:
+                # Regular sequential numbers
+                numeric_value = i
+            
+            nested_value = i * 7 + (i % 13)  # Different pattern for nested
+            
+            all_numbers.append(numeric_value)
+            all_nested_numbers.append(nested_value)
+            
+            rows.append({
+                default_primary_key_field_name: i,
+                default_vector_field_name: list(rng.random((1, default_dim))[0]),
+                "json_field": {
+                    "number": numeric_value,
+                    "index": i,
+                    "data": {"nested_number": nested_value}
+                }
+            })
+        
+        self.insert(client, collection_name, rows)
+        
+        # 3. Test modulo operations with different modulo values and remainders
+        test_cases = [
+            (10, list(range(10))),  # mod 10 - all remainders 0-9
+            (2, [0, 1]),  # mod 2 - even/odd
+            (3, [0, 1, 2]),  # mod 3
+            (7, list(range(7))),  # mod 7 - all remainders 0-6
+            (13, [0, 5, 12]),  # mod 13 - selected remainders
+        ]
+        
+        for modulo, remainders in test_cases:
+            log.info(f"\nTesting modulo {modulo}")
+            
+            for remainder in remainders:
+                # Calculate expected results from original data
+                expected_ids = [i for i, num in enumerate(all_numbers) if num % modulo == remainder]
+                expected_count = len(expected_ids)
+                
+                # Query and get results
+                filter_expr = f'json_field["number"] % {modulo} == {remainder}'
+                res, _ = self.query(client, collection_name, filter=filter_expr, 
+                                    output_fields=["json_field", default_primary_key_field_name])
+                
+                # Extract actual IDs from results
+                actual_ids = sorted([item[default_primary_key_field_name] for item in res])
+                expected_ids_sorted = sorted(expected_ids)
+                
+                log.info(f"Modulo {modulo} remainder {remainder}: expected {expected_count}, got {len(res)} results")
+                
+                # Verify we got exactly the expected results
+                assert len(res) == expected_count, \
+                    f"Expected {expected_count} results for % {modulo} == {remainder}, got {len(res)}"
+                assert actual_ids == expected_ids_sorted, \
+                    f"Result IDs don't match expected IDs for % {modulo} == {remainder}"
+                
+                # Also verify each result is correct
+                for item in res:
+                    actual_remainder = item["json_field"]["number"] % modulo
+                    assert actual_remainder == remainder, \
+                        f"Number {item['json_field']['number']} % {modulo} = {actual_remainder}, expected {remainder}"
+                
+                # Test nested field
+                expected_nested_ids = [i for i, num in enumerate(all_nested_numbers) if num % modulo == remainder]
+                nested_filter = f'json_field["data"]["nested_number"] % {modulo} == {remainder}'
+                nested_res, _ = self.query(client, collection_name, filter=nested_filter,
+                                          output_fields=["json_field", default_primary_key_field_name])
+                
+                actual_nested_ids = sorted([item[default_primary_key_field_name] for item in nested_res])
+                expected_nested_ids_sorted = sorted(expected_nested_ids)
+                
+                assert len(nested_res) == len(expected_nested_ids), \
+                    f"Expected {len(expected_nested_ids)} nested results for % {modulo} == {remainder}, got {len(nested_res)}"
+                assert actual_nested_ids == expected_nested_ids_sorted, \
+                    f"Nested result IDs don't match expected IDs for % {modulo} == {remainder}"
+        
+        
+        # Test combining modulo with other conditions
+        combined_expr = 'json_field["number"] % 2 == 0 && json_field["index"] < 100'
+        expected_combined = [i for i in range(min(100, nb)) if all_numbers[i] % 2 == 0]
+        res_combined, _ = self.query(client, collection_name, filter=combined_expr,
+                                    output_fields=["json_field", default_primary_key_field_name])
+        actual_combined_ids = sorted([item[default_primary_key_field_name] for item in res_combined])
+        
+        assert len(res_combined) == len(expected_combined), \
+            f"Expected {len(expected_combined)} results for combined filter, got {len(res_combined)}"
+        assert actual_combined_ids == sorted(expected_combined), \
+            "Results for combined filter don't match expected"
+        
+        log.info(f"All modulo tests passed! Combined filter returned {len(res_combined)} results")
+        
+        self.drop_collection(client, collection_name)


 class TestMilvusClientGetInvalid(TestMilvusClientV2Base):