// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "BinaryRangeExpr.h" #include #include "query/Utils.h" namespace milvus { namespace exec { void PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { auto input = context.get_offset_input(); SetHasOffsetInput((input != nullptr)); switch (expr_->column_.data_type_) { case DataType::BOOL: { result = ExecRangeVisitorImpl(context); break; } case DataType::INT8: { result = ExecRangeVisitorImpl(context); break; } case DataType::INT16: { result = ExecRangeVisitorImpl(context); break; } case DataType::INT32: { result = ExecRangeVisitorImpl(context); break; } case DataType::INT64: { result = ExecRangeVisitorImpl(context); break; } case DataType::FLOAT: { result = ExecRangeVisitorImpl(context); break; } case DataType::DOUBLE: { result = ExecRangeVisitorImpl(context); break; } case DataType::VARCHAR: { if (segment_->type() == SegmentType::Growing && !storage::MmapManager::GetInstance() .GetMmapConfig() .growing_enable_mmap) { result = ExecRangeVisitorImpl(context); } else { result = ExecRangeVisitorImpl(context); } break; } case DataType::JSON: { auto value_type = expr_->lower_val_.val_case(); if (is_index_mode_ && !has_offset_input_) { switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { proto::plan::GenericValue double_lower_val; double_lower_val.set_float_val( static_cast(expr_->lower_val_.int64_val())); proto::plan::GenericValue double_upper_val; double_upper_val.set_float_val( static_cast(expr_->upper_val_.int64_val())); lower_arg_.SetValue(double_lower_val); upper_arg_.SetValue(double_upper_val); arg_inited_ = true; result = ExecRangeVisitorImplForIndex(); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { result = ExecRangeVisitorImplForIndex(); break; } case proto::plan::GenericValue::ValCase::kStringVal: { result = ExecRangeVisitorImplForJson(context); break; } default: { PanicInfo(DataTypeInvalid, fmt::format( "unsupported value type {} in expression", value_type)); } } } else { switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { result = ExecRangeVisitorImplForJson(context); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { result = ExecRangeVisitorImplForJson(context); break; } case proto::plan::GenericValue::ValCase::kStringVal: { result = ExecRangeVisitorImplForJson(context); break; } default: { PanicInfo(DataTypeInvalid, fmt::format( "unsupported value type {} in expression", value_type)); } } } break; } case DataType::ARRAY: { auto value_type = expr_->lower_val_.val_case(); switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { SetNotUseIndex(); result = ExecRangeVisitorImplForArray(context); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { SetNotUseIndex(); result = ExecRangeVisitorImplForArray(context); break; } case proto::plan::GenericValue::ValCase::kStringVal: { SetNotUseIndex(); result = ExecRangeVisitorImplForArray(context); break; } default: { PanicInfo( DataTypeInvalid, fmt::format("unsupported value type {} in expression", value_type)); } } break; } default: PanicInfo(DataTypeInvalid, "unsupported data type: {}", expr_->column_.data_type_); } } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl(EvalCtx& context) { if (is_index_mode_ && !has_offset_input_) { return ExecRangeVisitorImplForIndex(); } else { return ExecRangeVisitorImplForData(context); } } template ColumnVectorPtr PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, HighPrecisionType& val2, bool& lower_inclusive, bool& upper_inclusive, OffsetVector* input) { lower_inclusive = expr_->lower_inclusive_; upper_inclusive = expr_->upper_inclusive_; if (!arg_inited_) { lower_arg_.SetValue(expr_->lower_val_); upper_arg_.SetValue(expr_->upper_val_); arg_inited_ = true; } val1 = lower_arg_.GetValue(); val2 = upper_arg_.GetValue(); auto get_next_overflow_batch = [this](OffsetVector* input) -> ColumnVectorPtr { int64_t batch_size; if (input != nullptr) { batch_size = input->size(); } else { batch_size = overflow_check_pos_ + batch_size_ >= active_count_ ? active_count_ - overflow_check_pos_ : batch_size_; overflow_check_pos_ += batch_size; } auto valid_res = (input != nullptr) ? ProcessChunksForValidByOffsets(is_index_mode_, *input) : ProcessChunksForValid(is_index_mode_); auto res_vec = std::make_shared(TargetBitmap(batch_size), std::move(valid_res)); return res_vec; }; if constexpr (std::is_integral_v && !std::is_same_v) { if (milvus::query::gt_ub(val1)) { return get_next_overflow_batch(input); } else if (milvus::query::lt_lb(val1)) { val1 = std::numeric_limits::min(); lower_inclusive = true; } if (milvus::query::gt_ub(val2)) { val2 = std::numeric_limits::max(); upper_inclusive = true; } else if (milvus::query::lt_lb(val2)) { return get_next_overflow_batch(input); } } return nullptr; } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() { typedef std:: conditional_t, std::string, T> IndexInnerType; using Index = index::ScalarIndex; typedef std::conditional_t && !std::is_same_v, int64_t, IndexInnerType> HighPrecisionType; HighPrecisionType val1; HighPrecisionType val2; bool lower_inclusive = false; bool upper_inclusive = false; if (auto res = PreCheckOverflow(val1, val2, lower_inclusive, upper_inclusive)) { return res; } auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } auto execute_sub_batch = [lower_inclusive, upper_inclusive]( Index* index_ptr, HighPrecisionType val1, HighPrecisionType val2) { BinaryRangeIndexFunc func; return std::move( func(index_ptr, val1, val2, lower_inclusive, upper_inclusive)); }; auto res = ProcessIndexChunks(execute_sub_batch, val1, val2); AssertInfo(res->size() == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", res->size(), real_batch_size); return res; } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(EvalCtx& context) { typedef std:: conditional_t, std::string, T> IndexInnerType; using Index = index::ScalarIndex; typedef std::conditional_t && !std::is_same_v, int64_t, IndexInnerType> HighPrecisionType; const auto& bitmap_input = context.get_bitmap_input(); auto* input = context.get_offset_input(); HighPrecisionType val1; HighPrecisionType val2; bool lower_inclusive = false; bool upper_inclusive = false; if (auto res = PreCheckOverflow( val1, val2, lower_inclusive, upper_inclusive, input)) { return res; } auto real_batch_size = has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } auto res_vec = std::make_shared(TargetBitmap(real_batch_size, false), TargetBitmap(real_batch_size, true)); TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); size_t processed_cursor = 0; auto execute_sub_batch = [ lower_inclusive, upper_inclusive, &processed_cursor, & bitmap_input ]( const T* data, const bool* valid_data, const int32_t* offsets, const int size, TargetBitmapView res, TargetBitmapView valid_res, HighPrecisionType val1, HighPrecisionType val2) { if (lower_inclusive && upper_inclusive) { BinaryRangeElementFunc func; func(val1, val2, data, size, res, bitmap_input, processed_cursor, offsets); } else if (lower_inclusive && !upper_inclusive) { BinaryRangeElementFunc func; func(val1, val2, data, size, res, bitmap_input, processed_cursor, offsets); } else if (!lower_inclusive && upper_inclusive) { BinaryRangeElementFunc func; func(val1, val2, data, size, res, bitmap_input, processed_cursor, offsets); } else { BinaryRangeElementFunc func; func(val1, val2, data, size, res, bitmap_input, processed_cursor, offsets); } // there is a batch operation in BinaryRangeElementFunc, // so not divide data again for the reason that it may reduce performance if the null distribution is scattered // but to mask res with valid_data after the batch operation. if (valid_data != nullptr) { for (int i = 0; i < size; i++) { auto offset = i; if constexpr (filter_type == FilterType::random) { offset = (offsets) ? offsets[i] : i; } if (!valid_data[offset]) { res[i] = valid_res[i] = false; } } } processed_cursor += size; }; auto skip_index_func = [val1, val2, lower_inclusive, upper_inclusive]( const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) { if (lower_inclusive && upper_inclusive) { return skip_index.CanSkipBinaryRange( field_id, chunk_id, val1, val2, true, true); } else if (lower_inclusive && !upper_inclusive) { return skip_index.CanSkipBinaryRange( field_id, chunk_id, val1, val2, true, false); } else if (!lower_inclusive && upper_inclusive) { return skip_index.CanSkipBinaryRange( field_id, chunk_id, val1, val2, false, true); } else { return skip_index.CanSkipBinaryRange( field_id, chunk_id, val1, val2, false, false); } }; int64_t processed_size; if (has_offset_input_) { processed_size = ProcessDataByOffsets(execute_sub_batch, skip_index_func, input, res, valid_res, val1, val2); } else { processed_size = ProcessDataChunks( execute_sub_batch, skip_index_func, res, valid_res, val1, val2); } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", processed_size, real_batch_size); return res_vec; } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(EvalCtx& context) { using GetType = std::conditional_t, std::string_view, ValueType>; const auto& bitmap_input = context.get_bitmap_input(); auto* input = context.get_offset_input(); FieldId field_id = expr_->column_.field_id_; if (CanUseJsonKeyIndex(field_id) && !has_offset_input_) { return ExecRangeVisitorImplForJsonForIndex(); } auto real_batch_size = has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } auto res_vec = std::make_shared(TargetBitmap(real_batch_size, false), TargetBitmap(real_batch_size, true)); TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); bool lower_inclusive = expr_->lower_inclusive_; bool upper_inclusive = expr_->upper_inclusive_; if (!arg_inited_) { lower_arg_.SetValue(expr_->lower_val_); upper_arg_.SetValue(expr_->upper_val_); arg_inited_ = true; } ValueType val1 = lower_arg_.GetValue(); ValueType val2 = upper_arg_.GetValue(); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); size_t processed_cursor = 0; auto execute_sub_batch = [ lower_inclusive, upper_inclusive, pointer, &bitmap_input, &processed_cursor ]( const milvus::Json* data, const bool* valid_data, const int32_t* offsets, const int size, TargetBitmapView res, TargetBitmapView valid_res, ValueType val1, ValueType val2) { if (lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForJson func; func(val1, val2, pointer, data, valid_data, size, res, valid_res, bitmap_input, processed_cursor, offsets); } else if (lower_inclusive && !upper_inclusive) { BinaryRangeElementFuncForJson func; func(val1, val2, pointer, data, valid_data, size, res, valid_res, bitmap_input, processed_cursor, offsets); } else if (!lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForJson func; func(val1, val2, pointer, data, valid_data, size, res, valid_res, bitmap_input, processed_cursor, offsets); } else { BinaryRangeElementFuncForJson func; func(val1, val2, pointer, data, valid_data, size, res, valid_res, bitmap_input, processed_cursor, offsets); } processed_cursor += size; }; int64_t processed_size; if (has_offset_input_) { processed_size = ProcessDataByOffsets(execute_sub_batch, std::nullptr_t{}, input, res, valid_res, val1, val2); } else { processed_size = ProcessDataChunks( execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2); } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", processed_size, real_batch_size); return res_vec; } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJsonForIndex() { using GetType = std::conditional_t, std::string_view, ValueType>; auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_ ? active_count_ - current_data_chunk_pos_ : batch_size_; auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); #define BinaryRangeJSONIndexCompare(cmp) \ do { \ auto val = json.at(offset, size); \ if (val.error()) { \ if constexpr (std::is_same_v) { \ auto val = json.at(offset, size); \ return !val.error() && (cmp); \ } \ return false; \ } \ return (cmp); \ } while (false) #define BinaryRangeJSONTypeCompare(cmp) \ do { \ if constexpr (std::is_same_v) { \ if (type == uint8_t(milvus::index::JSONType::STRING)) { \ auto val = json.at_string(offset, size); \ return (cmp); \ } else { \ return false; \ } \ } else if constexpr (std::is_same_v) { \ if (type == uint8_t(milvus::index::JSONType::INT64)) { \ auto val = \ std::stoll(std::string(json.at_string(offset, size))); \ return (cmp); \ } else if (type == uint8_t(milvus::index::JSONType::DOUBLE)) { \ auto val = \ std::stod(std::string(json.at_string(offset, size))); \ return (cmp); \ } else { \ return false; \ } \ } else if constexpr (std::is_same_v) { \ if (type == uint8_t(milvus::index::JSONType::INT64)) { \ auto val = \ std::stoll(std::string(json.at_string(offset, size))); \ return (cmp); \ } else if (type == uint8_t(milvus::index::JSONType::DOUBLE)) { \ auto val = \ std::stod(std::string(json.at_string(offset, size))); \ return (cmp); \ } else { \ return false; \ } \ } \ } while (false) #define BinaryRangeJSONTypeCompareWithValue(cmp) \ do { \ if constexpr (std::is_same_v) { \ if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \ float val = *reinterpret_cast(&value); \ return (cmp); \ } else { \ int64_t val = value; \ return (cmp); \ } \ } else if constexpr (std::is_same_v) { \ if (type == uint8_t(milvus::index::JSONType::FLOAT)) { \ float val = *reinterpret_cast(&value); \ return (cmp); \ } else { \ int64_t val = value; \ return (cmp); \ } \ } else if constexpr (std::is_same_v) { \ bool val = *reinterpret_cast(&value); \ return (cmp); \ } \ } while (false) bool lower_inclusive = expr_->lower_inclusive_; bool upper_inclusive = expr_->upper_inclusive_; ValueType val1 = GetValueFromProto(expr_->lower_val_); ValueType val2 = GetValueFromProto(expr_->upper_val_); if (cached_index_chunk_id_ != 0) { const segcore::SegmentInternalInterface* segment = nullptr; if (segment_->type() == SegmentType::Growing) { segment = dynamic_cast(segment_); } else if (segment_->type() == SegmentType::Sealed) { segment = dynamic_cast(segment_); } auto field_id = expr_->column_.field_id_; auto* index = segment->GetJsonKeyIndex(field_id); Assert(index != nullptr); auto filter_func = [segment, &field_id, val1, val2, lower_inclusive, upper_inclusive](bool valid, uint8_t type, uint32_t row_id, uint16_t offset, uint16_t size, int32_t value) { if (valid) { if constexpr (std::is_same_v) { if (type != uint8_t(milvus::index::JSONType::INT32) && type != uint8_t(milvus::index::JSONType::INT64) && type != uint8_t(milvus::index::JSONType::FLOAT) && type != uint8_t(milvus::index::JSONType::DOUBLE)) { return false; } } else if constexpr (std::is_same_v) { if (type != uint8_t(milvus::index::JSONType::STRING) && type != uint8_t(milvus::index::JSONType::STRING_ESCAPE)) { return false; } } else if constexpr (std::is_same_v) { if (type != uint8_t(milvus::index::JSONType::INT32) && type != uint8_t(milvus::index::JSONType::INT64) && type != uint8_t(milvus::index::JSONType::FLOAT) && type != uint8_t(milvus::index::JSONType::DOUBLE)) { return false; } } else if constexpr (std::is_same_v) { if (type != uint8_t(milvus::index::JSONType::BOOL)) { return false; } } if (lower_inclusive && upper_inclusive) { if (type == uint8_t(milvus::index::JSONType::FLOAT)) { BinaryRangeJSONTypeCompareWithValue( static_cast(val1) <= val && val <= static_cast(val2)); } else { BinaryRangeJSONTypeCompareWithValue(val1 <= val && val <= val2); } } else if (lower_inclusive && !upper_inclusive) { if (type == uint8_t(milvus::index::JSONType::FLOAT)) { BinaryRangeJSONTypeCompareWithValue( static_cast(val1) <= val && val < static_cast(val2)); } else { BinaryRangeJSONTypeCompareWithValue(val1 <= val && val < val2); } } else if (!lower_inclusive && upper_inclusive) { if (type == uint8_t(milvus::index::JSONType::FLOAT)) { BinaryRangeJSONTypeCompareWithValue( static_cast(val1) < val && val <= static_cast(val2)); } else { BinaryRangeJSONTypeCompareWithValue(val1 < val && val <= val2); } } else { if (type == uint8_t(milvus::index::JSONType::FLOAT)) { BinaryRangeJSONTypeCompareWithValue( static_cast(val1) < val && val < static_cast(val2)); } else { BinaryRangeJSONTypeCompareWithValue(val1 < val && val < val2); } } } else { auto json_pair = segment->GetJsonData(field_id, row_id); if (!json_pair.second) { return false; } auto json = milvus::Json(json_pair.first.data(), json_pair.first.size()); if (lower_inclusive && upper_inclusive) { if (type == uint8_t(milvus::index::JSONType::STRING) || type == uint8_t(milvus::index::JSONType::DOUBLE) || type == uint8_t(milvus::index::JSONType::INT64)) { BinaryRangeJSONTypeCompare(val1 <= val && val <= val2); } else { BinaryRangeJSONIndexCompare( val1 <= ValueType(val.value()) && ValueType(val.value()) <= val2); } } else if (lower_inclusive && !upper_inclusive) { if (type == uint8_t(milvus::index::JSONType::STRING) || type == uint8_t(milvus::index::JSONType::DOUBLE) || type == uint8_t(milvus::index::JSONType::INT64)) { BinaryRangeJSONTypeCompare(val1 <= val && val < val2); } else { BinaryRangeJSONIndexCompare( val1 <= ValueType(val.value()) && ValueType(val.value()) < val2); } } else if (!lower_inclusive && upper_inclusive) { if (type == uint8_t(milvus::index::JSONType::STRING) || type == uint8_t(milvus::index::JSONType::DOUBLE) || type == uint8_t(milvus::index::JSONType::INT64)) { BinaryRangeJSONTypeCompare(val1 < val && val <= val2); } else { BinaryRangeJSONIndexCompare( val1 < ValueType(val.value()) && ValueType(val.value()) <= val2); } } else { if (type == uint8_t(milvus::index::JSONType::STRING) || type == uint8_t(milvus::index::JSONType::DOUBLE) || type == uint8_t(milvus::index::JSONType::INT64)) { BinaryRangeJSONTypeCompare(val1 < val && val < val2); } else { BinaryRangeJSONIndexCompare( val1 < ValueType(val.value()) && ValueType(val.value()) < val2); } } } }; bool is_growing = segment_->type() == SegmentType::Growing; bool is_strong_consistency = consistency_level_ == 0; cached_index_chunk_res_ = index ->FilterByPath(pointer, active_count_, is_growing, is_strong_consistency, filter_func) .clone(); cached_index_chunk_id_ = 0; } TargetBitmap result; result.append( cached_index_chunk_res_, current_data_global_pos_, real_batch_size); MoveCursor(); return std::make_shared(std::move(result), TargetBitmap(real_batch_size, true)); } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(EvalCtx& context) { using GetType = std::conditional_t, std::string_view, ValueType>; const auto& bitmap_input = context.get_bitmap_input(); auto* input = context.get_offset_input(); auto real_batch_size = has_offset_input_ ? input->size() : GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } auto res_vec = std::make_shared(TargetBitmap(real_batch_size, false), TargetBitmap(real_batch_size, true)); TargetBitmapView res(res_vec->GetRawData(), real_batch_size); TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size); bool lower_inclusive = expr_->lower_inclusive_; bool upper_inclusive = expr_->upper_inclusive_; if (!arg_inited_) { lower_arg_.SetValue(expr_->lower_val_); upper_arg_.SetValue(expr_->upper_val_); arg_inited_ = true; } ValueType val1 = lower_arg_.GetValue(); ValueType val2 = upper_arg_.GetValue(); int index = -1; if (expr_->column_.nested_path_.size() > 0) { index = std::stoi(expr_->column_.nested_path_[0]); } size_t processed_cursor = 0; auto execute_sub_batch = [ lower_inclusive, upper_inclusive, &processed_cursor, & bitmap_input ]( const milvus::ArrayView* data, const bool* valid_data, const int32_t* offsets, const int size, TargetBitmapView res, TargetBitmapView valid_res, ValueType val1, ValueType val2, int index) { if (lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForArray func; func(val1, val2, index, data, valid_data, size, res, valid_res, bitmap_input, processed_cursor, offsets); } else if (lower_inclusive && !upper_inclusive) { BinaryRangeElementFuncForArray func; func(val1, val2, index, data, valid_data, size, res, valid_res, bitmap_input, processed_cursor, offsets); } else if (!lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForArray func; func(val1, val2, index, data, valid_data, size, res, valid_res, bitmap_input, processed_cursor, offsets); } else { BinaryRangeElementFuncForArray func; func(val1, val2, index, data, valid_data, size, res, valid_res, bitmap_input, processed_cursor, offsets); } processed_cursor += size; }; int64_t processed_size; if (has_offset_input_) { processed_size = ProcessDataByOffsets(execute_sub_batch, std::nullptr_t{}, input, res, valid_res, val1, val2, index); } else { processed_size = ProcessDataChunks(execute_sub_batch, std::nullptr_t{}, res, valid_res, val1, val2, index); } AssertInfo(processed_size == real_batch_size, "internal error: expr processed rows {} not equal " "expect batch size {}", processed_size, real_batch_size); return res_vec; } } //namespace exec } // namespace milvus