mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
fix:fix json_contains(path, int) bug (#44814)
#44816 Signed-off-by: luzhang <luzhang@zilliz.com> Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
parent
df6a4dc1a0
commit
2f178f810f
@ -15,6 +15,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "JsonContainsExpr.h"
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include "common/Types.h"
|
||||
|
||||
@ -339,6 +340,17 @@ PhyJsonContainsFilterExpr::ExecJsonContains(EvalCtx& context) {
|
||||
for (auto&& it : array) {
|
||||
auto val = it.template get<GetType>();
|
||||
if (val.error()) {
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) {
|
||||
auto double_val = it.template get<double>();
|
||||
if (!double_val.error() &&
|
||||
double_val.value() ==
|
||||
std::floor(double_val.value())) {
|
||||
if (elements->In(static_cast<int64_t>(
|
||||
double_val.value())) > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (elements->In(val.value()) > 0) {
|
||||
@ -843,6 +855,18 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAll(EvalCtx& context) {
|
||||
for (auto&& it : array) {
|
||||
auto val = it.template get<GetType>();
|
||||
if (val.error()) {
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) {
|
||||
auto double_val = it.template get<double>();
|
||||
if (!double_val.error() &&
|
||||
double_val.value() ==
|
||||
std::floor(double_val.value())) {
|
||||
tmp_elements.erase(
|
||||
static_cast<int64_t>(double_val.value()));
|
||||
if (tmp_elements.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
tmp_elements.erase(val.value());
|
||||
@ -965,6 +989,22 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllByStats() {
|
||||
auto value = milvus::BsonView::GetValueFromBsonView<GetType>(
|
||||
element.get_value());
|
||||
if (!value.has_value()) {
|
||||
if constexpr (std::is_same_v<GetType, int64_t>) {
|
||||
auto double_value =
|
||||
milvus::BsonView::GetValueFromBsonView<double>(
|
||||
element.get_value());
|
||||
if (double_value.has_value()) {
|
||||
if (double_value.value() ==
|
||||
std::floor(double_value.value())) {
|
||||
tmp_elements.erase(
|
||||
static_cast<int64_t>(double_value.value()));
|
||||
}
|
||||
if (tmp_elements.size() == 0) {
|
||||
res_view[row_offset] = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
tmp_elements.erase(value.value());
|
||||
@ -1058,6 +1098,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffType(EvalCtx& context) {
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
auto val = it.template get<int64_t>();
|
||||
if (val.error()) {
|
||||
auto double_val = it.template get<double>();
|
||||
if (!double_val.error() &&
|
||||
double_val.value() == element.int64_val()) {
|
||||
tmp_elements_index.erase(i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.int64_val()) {
|
||||
@ -1237,8 +1282,9 @@ PhyJsonContainsFilterExpr::ExecJsonContainsAllWithDiffTypeByStats() {
|
||||
break;
|
||||
}
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
// get double/int64 from bson
|
||||
auto val =
|
||||
milvus::BsonView::GetValueFromBsonView<int64_t>(
|
||||
milvus::BsonView::GetValueFromBsonView<double>(
|
||||
sub_value.get_value());
|
||||
if (!val.has_value()) {
|
||||
continue;
|
||||
@ -1588,6 +1634,11 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffType(EvalCtx& context) {
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
auto val = it.template get<int64_t>();
|
||||
if (val.error()) {
|
||||
auto double_val = it.template get<double>();
|
||||
if (!double_val.error() &&
|
||||
double_val.value() == element.int64_val()) {
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (val.value() == element.int64_val()) {
|
||||
@ -1751,7 +1802,7 @@ PhyJsonContainsFilterExpr::ExecJsonContainsWithDiffTypeByStats() {
|
||||
}
|
||||
case proto::plan::GenericValue::kInt64Val: {
|
||||
auto val =
|
||||
milvus::BsonView::GetValueFromBsonView<int64_t>(
|
||||
milvus::BsonView::GetValueFromBsonView<double>(
|
||||
sub_value.get_value());
|
||||
if (!val.has_value()) {
|
||||
continue;
|
||||
|
||||
@ -2610,40 +2610,52 @@ def gen_json_field_expressions_and_templates():
|
||||
return expressions
|
||||
|
||||
|
||||
def gen_json_field_expressions_all_single_operator():
|
||||
def gen_json_field_expressions_all_single_operator(json_cast_type=None):
|
||||
"""
|
||||
Gen a list of filter in expression-format(as a string)
|
||||
:param json_cast_type: Optional parameter to specify the JSON cast type (e.g., "ARRAY_DOUBLE")
|
||||
"""
|
||||
expressions = ["json_field['a'] <= 1", "json_field['a'] <= 1.0", "json_field['a'] >= 1", "json_field['a'] >= 1.0",
|
||||
"json_field['a'] < 2", "json_field['a'] < 2.0", "json_field['a'] > 0", "json_field['a'] > 0.0",
|
||||
"json_field['a'] <= '1'", "json_field['a'] >= '1'", "json_field['a'] < '2'", "json_field['a'] > '0'",
|
||||
"json_field['a'] == 1", "json_field['a'] == 1.0", "json_field['a'] == True",
|
||||
"json_field['a'] == 9707199254740993.0", "json_field['a'] == 9707199254740992",
|
||||
"json_field['a'] == '1'",
|
||||
"json_field['a'] != '1'", "json_field['a'] like '1%'", "json_field['a'] like '%1'",
|
||||
"json_field['a'] like '%1%'", "json_field['a'] LIKE '1%'", "json_field['a'] LIKE '%1'",
|
||||
"json_field['a'] LIKE '%1%'", "EXISTS json_field['a']", "exists json_field['a']",
|
||||
"EXISTS json_field['a']['b']", "exists json_field['a']['b']", "json_field['a'] + 1 >= 2",
|
||||
"json_field['a'] - 1 <= 0", "json_field['a'] + 1.0 >= 2", "json_field['a'] - 1.0 <= 0",
|
||||
"json_field['a'] * 2 == 2", "json_field['a'] * 1.0 == 1.0", "json_field / 1 == 1",
|
||||
"json_field['a'] / 1.0 == 1", "json_field['a'] % 10 == 1", "json_field['a'] == 1**2",
|
||||
"json_field['a'][0] == 1 && json_field['a'][1] == 2",
|
||||
"json_field['a'][0] == 1 and json_field['a'][1] == 2",
|
||||
"json_field['a'][0]['b'] >=1 && json_field['a'][2] == 3",
|
||||
"json_field['a'][0]['b'] >=1 and json_field['a'][2] == 3",
|
||||
"json_field['a'] == 1 || json_field['a'] == '1'", "json_field['a'] == 1 or json_field['a'] == '1'",
|
||||
"json_field['a'][0]['b'] >=1 || json_field['a']['b'] >=1",
|
||||
"json_field['a'][0]['b'] >=1 or json_field['a']['b'] >=1",
|
||||
"json_field['a'] in [1]", "json_contains(json_field['a'], 1)", "JSON_CONTAINS(json_field['a'], 1)",
|
||||
"json_contains_all(json_field['a'], [2.0, '4'])", "JSON_CONTAINS_ALL(json_field['a'], [2.0, '4'])",
|
||||
"json_contains_any(json_field['a'], [2.0, '4'])", "JSON_CONTAINS_ANY(json_field['a'], [2.0, '4'])",
|
||||
"array_contains(json_field['a'], 2)", "ARRAY_CONTAINS(json_field['a'], 2)",
|
||||
"array_contains_all(json_field['a'], [1.0, 2])", "ARRAY_CONTAINS_ALL(json_field['a'], [1.0, 2])",
|
||||
"array_contains_any(json_field['a'], [1.0, 2])", "ARRAY_CONTAINS_ANY(json_field['a'], [1.0, 2])",
|
||||
"array_length(json_field['a']) < 10", "ARRAY_LENGTH(json_field['a']) < 10",
|
||||
"json_field is null", "json_field IS NULL", "json_field is not null", "json_field IS NOT NULL",
|
||||
"json_field['a'] is null", "json_field['a'] IS NULL", "json_field['a'] is not null", "json_field['a'] IS NOT NULL"
|
||||
]
|
||||
if json_cast_type == "ARRAY_DOUBLE":
|
||||
# For ARRAY_DOUBLE type, use array-specific expressions
|
||||
expressions = [
|
||||
"json_contains(json_field['a'], 1)", "JSON_CONTAINS(json_field['a'], 1)",
|
||||
"json_contains(json_field['a'], 1.0)", "json_contains(json_field['a'], 2)",
|
||||
"json_contains_all(json_field['a'], [1, 2])", "JSON_CONTAINS_ALL(json_field['a'], [1, 2])",
|
||||
"json_contains_all(json_field['a'], [1.0, 2.0])", "json_contains_all(json_field['a'], [2, 4])",
|
||||
"json_contains_any(json_field['a'], [1, 2])", "JSON_CONTAINS_ANY(json_field['a'], [1, 2])",
|
||||
"json_contains_any(json_field['a'], [1.0, 2.0])", "json_contains_any(json_field['a'], [2, 4])",
|
||||
"array_contains(json_field['a'], 1)", "ARRAY_CONTAINS(json_field['a'], 1)",
|
||||
"array_contains(json_field['a'], 1.0)", "array_contains(json_field['a'], 2)",
|
||||
"array_contains_all(json_field['a'], [1, 2])", "ARRAY_CONTAINS_ALL(json_field['a'], [1, 2])",
|
||||
"array_contains_all(json_field['a'], [1.0, 2.0])", "array_contains_all(json_field['a'], [2, 4])",
|
||||
"array_contains_any(json_field['a'], [1, 2])", "ARRAY_CONTAINS_ANY(json_field['a'], [1, 2])",
|
||||
"array_contains_any(json_field['a'], [1.0, 2.0])", "array_contains_any(json_field['a'], [2, 4])",
|
||||
"array_length(json_field['a']) < 10", "ARRAY_LENGTH(json_field['a']) < 10"
|
||||
]
|
||||
else:
|
||||
expressions = ["json_field['a'] <= 1", "json_field['a'] <= 1.0", "json_field['a'] >= 1", "json_field['a'] >= 1.0",
|
||||
"json_field['a'] < 2", "json_field['a'] < 2.0", "json_field['a'] > 0", "json_field['a'] > 0.0",
|
||||
"json_field['a'] <= '1'", "json_field['a'] >= '1'", "json_field['a'] < '2'", "json_field['a'] > '0'",
|
||||
"json_field['a'] == 1", "json_field['a'] == 1.0", "json_field['a'] == True",
|
||||
"json_field['a'] == 9707199254740993.0", "json_field['a'] == 9707199254740992",
|
||||
"json_field['a'] == '1'",
|
||||
"json_field['a'] != '1'", "json_field['a'] like '1%'", "json_field['a'] like '%1'",
|
||||
"json_field['a'] like '%1%'", "json_field['a'] LIKE '1%'", "json_field['a'] LIKE '%1'",
|
||||
"json_field['a'] LIKE '%1%'", "EXISTS json_field['a']", "exists json_field['a']",
|
||||
"EXISTS json_field['a']['b']", "exists json_field['a']['b']", "json_field['a'] + 1 >= 2",
|
||||
"json_field['a'] - 1 <= 0", "json_field['a'] + 1.0 >= 2", "json_field['a'] - 1.0 <= 0",
|
||||
"json_field['a'] * 2 == 2", "json_field['a'] * 1.0 == 1.0", "json_field / 1 == 1",
|
||||
"json_field['a'] / 1.0 == 1", "json_field['a'] % 10 == 1", "json_field['a'] == 1**2",
|
||||
"json_field['a'][0] == 1 && json_field['a'][1] == 2",
|
||||
"json_field['a'][0] == 1 and json_field['a'][1] == 2",
|
||||
"json_field['a'][0]['b'] >=1 && json_field['a'][2] == 3",
|
||||
"json_field['a'][0]['b'] >=1 and json_field['a'][2] == 3",
|
||||
"json_field['a'] == 1 || json_field['a'] == '1'", "json_field['a'] == 1 or json_field['a'] == '1'",
|
||||
"json_field['a'][0]['b'] >=1 || json_field['a']['b'] >=1",
|
||||
"json_field['a'][0]['b'] >=1 or json_field['a']['b'] >=1",
|
||||
"json_field['a'] in [1]", "json_field is null", "json_field IS NULL", "json_field is not null", "json_field IS NOT NULL",
|
||||
"json_field['a'] is null", "json_field['a'] IS NULL", "json_field['a'] is not null", "json_field['a'] IS NOT NULL"
|
||||
]
|
||||
|
||||
return expressions
|
||||
|
||||
|
||||
@ -4104,8 +4104,8 @@ class TestMilvusClientQueryJsonPathIndex(TestMilvusClientV2Base):
|
||||
def supported_varchar_scalar_index(self, request):
|
||||
yield request.param
|
||||
|
||||
# @pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "json"", "bool"])
|
||||
@pytest.fixture(scope="function", params=["DOUBLE"])
|
||||
# @pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "json"", "bool", "ARRAY_DOUBLE"])
|
||||
@pytest.fixture(scope="function", params=["DOUBLE", "ARRAY_DOUBLE"])
|
||||
def supported_json_cast_type(self, request):
|
||||
yield request.param
|
||||
|
||||
@ -4167,10 +4167,11 @@ class TestMilvusClientQueryJsonPathIndex(TestMilvusClientV2Base):
|
||||
# 3. flush if specified
|
||||
if is_flush:
|
||||
self.flush(client, collection_name)
|
||||
time.sleep(300)
|
||||
# 4. query when there is no json path index under all expressions
|
||||
# skip negative expression for issue 40685
|
||||
# "my_json['a'] != 1", "my_json['a'] != 1.0", "my_json['a'] != '1'", "my_json['a'] != 1.1", "my_json['a'] not in [1]"
|
||||
express_list = cf.gen_json_field_expressions_all_single_operator()
|
||||
express_list = cf.gen_json_field_expressions_all_single_operator(supported_json_cast_type)
|
||||
compare_dict = {}
|
||||
for i in range(len(express_list)):
|
||||
json_list = []
|
||||
@ -4195,11 +4196,15 @@ class TestMilvusClientQueryJsonPathIndex(TestMilvusClientV2Base):
|
||||
# 6. prepare index params with json path index
|
||||
index_name = "json_index"
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
json_path_list = [f"{json_field_name}", f"{json_field_name}[0]", f"{json_field_name}[1]",
|
||||
f"{json_field_name}[6]", f"{json_field_name}['a']", f"{json_field_name}['a']['b']",
|
||||
f"{json_field_name}['a'][0]", f"{json_field_name}['a'][6]", f"{json_field_name}['a'][0]['b']",
|
||||
f"{json_field_name}['a']['b']['c']", f"{json_field_name}['a']['b'][0]['d']",
|
||||
f"{json_field_name}[10000]", f"{json_field_name}['a']['c'][0]['d']"]
|
||||
if supported_json_cast_type == "ARRAY_DOUBLE":
|
||||
# For ARRAY_DOUBLE type, use array paths
|
||||
json_path_list = [f"{json_field_name}['a']"]
|
||||
else:
|
||||
json_path_list = [f"{json_field_name}", f"{json_field_name}[0]", f"{json_field_name}[1]",
|
||||
f"{json_field_name}[6]", f"{json_field_name}['a']", f"{json_field_name}['a']['b']",
|
||||
f"{json_field_name}['a'][0]", f"{json_field_name}['a'][6]", f"{json_field_name}['a'][0]['b']",
|
||||
f"{json_field_name}['a']['b']['c']", f"{json_field_name}['a']['b'][0]['d']",
|
||||
f"{json_field_name}[10000]", f"{json_field_name}['a']['c'][0]['d']"]
|
||||
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
||||
for i in range(len(json_path_list)):
|
||||
index_params.add_index(field_name=json_field_name, index_name=index_name + f'{i}',
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user