// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "BinaryRangeExpr.h" #include "query/Utils.h" namespace milvus { namespace exec { void PhyBinaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { switch (expr_->column_.data_type_) { case DataType::BOOL: { result = ExecRangeVisitorImpl(); break; } case DataType::INT8: { result = ExecRangeVisitorImpl(); break; } case DataType::INT16: { result = ExecRangeVisitorImpl(); break; } case DataType::INT32: { result = ExecRangeVisitorImpl(); break; } case DataType::INT64: { result = ExecRangeVisitorImpl(); break; } case DataType::FLOAT: { result = ExecRangeVisitorImpl(); break; } case DataType::DOUBLE: { result = ExecRangeVisitorImpl(); break; } case DataType::VARCHAR: { if (segment_->type() == SegmentType::Growing) { result = ExecRangeVisitorImpl(); } else { result = ExecRangeVisitorImpl(); } break; } case DataType::JSON: { auto value_type = expr_->lower_val_.val_case(); switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { result = ExecRangeVisitorImplForJson(); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { result = ExecRangeVisitorImplForJson(); break; } case proto::plan::GenericValue::ValCase::kStringVal: { result = ExecRangeVisitorImplForJson(); break; } default: { PanicInfo( DataTypeInvalid, fmt::format("unsupported value type {} in expression", value_type)); } } break; } case DataType::ARRAY: { auto value_type = expr_->lower_val_.val_case(); switch (value_type) { case proto::plan::GenericValue::ValCase::kInt64Val: { result = ExecRangeVisitorImplForArray(); break; } case proto::plan::GenericValue::ValCase::kFloatVal: { result = ExecRangeVisitorImplForArray(); break; } case proto::plan::GenericValue::ValCase::kStringVal: { result = ExecRangeVisitorImplForArray(); break; } default: { PanicInfo( DataTypeInvalid, fmt::format("unsupported value type {} in expression", value_type)); } } break; } default: PanicInfo(DataTypeInvalid, fmt::format("unsupported data type: {}", expr_->column_.data_type_)); } } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImpl() { if (is_index_mode_) { return ExecRangeVisitorImplForIndex(); } else { return ExecRangeVisitorImplForData(); } } template ColumnVectorPtr PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1, HighPrecisionType& val2, bool& lower_inclusive, bool& upper_inclusive) { lower_inclusive = expr_->lower_inclusive_; upper_inclusive = expr_->upper_inclusive_; val1 = GetValueFromProto(expr_->lower_val_); val2 = GetValueFromProto(expr_->upper_val_); auto get_next_overflow_batch = [this]() -> ColumnVectorPtr { int64_t batch_size = overflow_check_pos_ + batch_size_ >= num_rows_ ? num_rows_ - overflow_check_pos_ : batch_size_; overflow_check_pos_ += batch_size; if (cached_overflow_res_ != nullptr && cached_overflow_res_->size() == batch_size) { return cached_overflow_res_; } auto res = std::make_shared(DataType::BOOL, batch_size); return res; }; if constexpr (std::is_integral_v && !std::is_same_v) { if (milvus::query::gt_ub(val1)) { return get_next_overflow_batch(); } else if (milvus::query::lt_lb(val1)) { val1 = std::numeric_limits::min(); lower_inclusive = true; } if (milvus::query::gt_ub(val2)) { val2 = std::numeric_limits::max(); upper_inclusive = true; } else if (milvus::query::lt_lb(val2)) { return get_next_overflow_batch(); } } return nullptr; } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForIndex() { typedef std:: conditional_t, std::string, T> IndexInnerType; using Index = index::ScalarIndex; typedef std::conditional_t && !std::is_same_v, int64_t, IndexInnerType> HighPrecisionType; HighPrecisionType val1; HighPrecisionType val2; bool lower_inclusive = false; bool upper_inclusive = false; if (auto res = PreCheckOverflow(val1, val2, lower_inclusive, upper_inclusive)) { return res; } auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } auto execute_sub_batch = [lower_inclusive, upper_inclusive]( Index* index_ptr, HighPrecisionType val1, HighPrecisionType val2) { BinaryRangeIndexFunc func; return std::move( func(index_ptr, val1, val2, lower_inclusive, upper_inclusive)); }; auto res = ProcessIndexChunks(execute_sub_batch, val1, val2); AssertInfo(res.size() == real_batch_size, fmt::format("internal error: expr processed rows {} not equal " "expect batch size {}", res.size(), real_batch_size)); return std::make_shared(std::move(res)); } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData() { typedef std:: conditional_t, std::string, T> IndexInnerType; using Index = index::ScalarIndex; typedef std::conditional_t && !std::is_same_v, int64_t, IndexInnerType> HighPrecisionType; auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } HighPrecisionType val1; HighPrecisionType val2; bool lower_inclusive = false; bool upper_inclusive = false; if (auto res = PreCheckOverflow(val1, val2, lower_inclusive, upper_inclusive)) { return res; } auto res_vec = std::make_shared(DataType::BOOL, real_batch_size); bool* res = (bool*)res_vec->GetRawData(); auto execute_sub_batch = [lower_inclusive, upper_inclusive]( const T* data, const int size, bool* res, HighPrecisionType val1, HighPrecisionType val2) { if (lower_inclusive && upper_inclusive) { BinaryRangeElementFunc func; func(val1, val2, data, size, res); } else if (lower_inclusive && !upper_inclusive) { BinaryRangeElementFunc func; func(val1, val2, data, size, res); } else if (!lower_inclusive && upper_inclusive) { BinaryRangeElementFunc func; func(val1, val2, data, size, res); } else { BinaryRangeElementFunc func; func(val1, val2, data, size, res); } }; auto skip_index_func = [val1, val2, lower_inclusive, upper_inclusive]( const SkipIndex& skip_index, FieldId field_id, int64_t chunk_id) { if (lower_inclusive && upper_inclusive) { return skip_index.CanSkipBinaryRange( field_id, chunk_id, val1, val2, true, true); } else if (lower_inclusive && !upper_inclusive) { return skip_index.CanSkipBinaryRange( field_id, chunk_id, val1, val2, true, false); } else if (!lower_inclusive && upper_inclusive) { return skip_index.CanSkipBinaryRange( field_id, chunk_id, val1, val2, false, true); } else { return skip_index.CanSkipBinaryRange( field_id, chunk_id, val1, val2, false, false); } }; int64_t processed_size = ProcessDataChunks( execute_sub_batch, skip_index_func, res, val1, val2); AssertInfo(processed_size == real_batch_size, fmt::format("internal error: expr processed rows {} not equal " "expect batch size {}", processed_size, real_batch_size)); return res_vec; } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson() { using GetType = std::conditional_t, std::string_view, ValueType>; auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } auto res_vec = std::make_shared(DataType::BOOL, real_batch_size); bool* res = (bool*)res_vec->GetRawData(); bool lower_inclusive = expr_->lower_inclusive_; bool upper_inclusive = expr_->upper_inclusive_; ValueType val1 = GetValueFromProto(expr_->lower_val_); ValueType val2 = GetValueFromProto(expr_->upper_val_); auto pointer = milvus::Json::pointer(expr_->column_.nested_path_); auto execute_sub_batch = [lower_inclusive, upper_inclusive, pointer]( const milvus::Json* data, const int size, bool* res, ValueType val1, ValueType val2) { if (lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForJson func; func(val1, val2, pointer, data, size, res); } else if (lower_inclusive && !upper_inclusive) { BinaryRangeElementFuncForJson func; func(val1, val2, pointer, data, size, res); } else if (!lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForJson func; func(val1, val2, pointer, data, size, res); } else { BinaryRangeElementFuncForJson func; func(val1, val2, pointer, data, size, res); } }; int64_t processed_size = ProcessDataChunks( execute_sub_batch, std::nullptr_t{}, res, val1, val2); AssertInfo(processed_size == real_batch_size, fmt::format("internal error: expr processed rows {} not equal " "expect batch size {}", processed_size, real_batch_size)); return res_vec; } template VectorPtr PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray() { using GetType = std::conditional_t, std::string_view, ValueType>; auto real_batch_size = GetNextBatchSize(); if (real_batch_size == 0) { return nullptr; } auto res_vec = std::make_shared(DataType::BOOL, real_batch_size); bool* res = (bool*)res_vec->GetRawData(); bool lower_inclusive = expr_->lower_inclusive_; bool upper_inclusive = expr_->upper_inclusive_; ValueType val1 = GetValueFromProto(expr_->lower_val_); ValueType val2 = GetValueFromProto(expr_->upper_val_); int index = -1; if (expr_->column_.nested_path_.size() > 0) { index = std::stoi(expr_->column_.nested_path_[0]); } auto execute_sub_batch = [lower_inclusive, upper_inclusive]( const milvus::ArrayView* data, const int size, bool* res, ValueType val1, ValueType val2, int index) { if (lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForArray func; func(val1, val2, index, data, size, res); } else if (lower_inclusive && !upper_inclusive) { BinaryRangeElementFuncForArray func; func(val1, val2, index, data, size, res); } else if (!lower_inclusive && upper_inclusive) { BinaryRangeElementFuncForArray func; func(val1, val2, index, data, size, res); } else { BinaryRangeElementFuncForArray func; func(val1, val2, index, data, size, res); } }; int64_t processed_size = ProcessDataChunks( execute_sub_batch, std::nullptr_t{}, res, val1, val2, index); AssertInfo(processed_size == real_batch_size, fmt::format("internal error: expr processed rows {} not equal " "expect batch size {}", processed_size, real_batch_size)); return res_vec; } } //namespace exec } // namespace milvus