milvus/internal/core/src/exec/expression/Expr.h

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <memory>
#include <string>
#include <type_traits>

#include "common/FieldDataInterface.h"
#include "common/Json.h"
#include "common/OpContext.h"
#include "common/Types.h"
#include "exec/expression/EvalCtx.h"
#include "exec/expression/Utils.h"
#include "exec/QueryContext.h"
#include "expr/ITypeExpr.h"
#include "index/Index.h"
#include "index/JsonFlatIndex.h"
#include "log/Log.h"
#include "query/PlanProto.h"
#include "segcore/SegmentSealed.h"
#include "segcore/SegmentInterface.h"
#include "segcore/SegmentGrowingImpl.h"
namespace milvus {
namespace exec {

enum class FilterType { sequential = 0, random = 1 };

inline std::vector<PinWrapper<const index::IndexBase*>>
PinIndex(milvus::OpContext* op_ctx,
         const segcore::SegmentInternalInterface* segment,
         const FieldMeta& field_meta,
         const std::vector<std::string>& path = {},
         DataType data_type = DataType::NONE,
         bool any_type = false,
         bool is_array = false) {
    if (field_meta.get_data_type() == DataType::JSON) {
        auto pointer = milvus::Json::pointer(path);
        return segment->PinJsonIndex(op_ctx,
                                     field_meta.get_id(),
                                     pointer,
                                     data_type,
                                     any_type,
                                     is_array);
    } else {
        return segment->PinIndex(op_ctx, field_meta.get_id());
    }
}

class Expr {
 public:
    Expr(DataType type,
         const std::vector<std::shared_ptr<Expr>>&& inputs,
         const std::string& name,
         milvus::OpContext* op_ctx)
        : type_(type),
          inputs_(std::move(inputs)),
          name_(name),
          op_ctx_(op_ctx) {
    }

    virtual ~Expr() = default;

    const DataType&
    type() const {
        return type_;
    }

    std::string
    name() {
        return name_;
    }

    virtual void
    Eval(EvalCtx& context, VectorPtr& result) {
    }

    // Only move cursor to next batch
    // but not do real eval for optimization
    virtual void
    MoveCursor() {
    }

    void
    SetHasOffsetInput(bool has_offset_input) {
        has_offset_input_ = has_offset_input;
    }

    virtual bool
    SupportOffsetInput() {
        return true;
    }

    virtual std::string
    ToString() const {
        ThrowInfo(ErrorCode::NotImplemented, "not implemented");
    }

    virtual bool
    IsSource() const {
        return false;
    }

    virtual std::optional<milvus::expr::ColumnInfo>
    GetColumnInfo() const {
        ThrowInfo(ErrorCode::NotImplemented, "not implemented");
    }

    std::vector<std::shared_ptr<Expr>>&
    GetInputsRef() {
        return inputs_;
    }

    using SkipNamespaceFunc = std::function<bool(int64_t chunk_id)>;
    virtual void
    SetNamespaceSkipFunc(SkipNamespaceFunc skip_namespace_func) {
        namespace_skip_func_ = std::move(skip_namespace_func);
    }

 protected:
    DataType type_;
    std::vector<std::shared_ptr<Expr>> inputs_;
    std::string name_;
    milvus::OpContext* op_ctx_;

    // whether we have offset input and do expr filtering on these data
    // default is false which means we will do expr filtering on the total segment data
    bool has_offset_input_ = false;
    // check if we can skip a chunk for namespace field.
    // if there's no namespace field, this is std::nullopt.
    // TODO: for expression like f1 > 1 and f2 > 2, we can use skip function of f1 when evaluating f2.
    std::optional<SkipNamespaceFunc> namespace_skip_func_;
};

using ExprPtr = std::shared_ptr<milvus::exec::Expr>;

/*
 * The expr has only one column.
 */
class SegmentExpr : public Expr {
 public:
    SegmentExpr(const std::vector<ExprPtr>&& input,
                const std::string& name,
                milvus::OpContext* op_ctx,
                const segcore::SegmentInternalInterface* segment,
                const FieldId field_id,
                const std::vector<std::string> nested_path,
                const DataType value_type,
                int64_t active_count,
                int64_t batch_size,
                int32_t consistency_level,
                bool allow_any_json_cast_type = false,
                bool is_json_contains = false)
        : Expr(DataType::BOOL, std::move(input), name, op_ctx),
          segment_(const_cast<segcore::SegmentInternalInterface*>(segment)),
          field_id_(field_id),
          nested_path_(nested_path),
          value_type_(value_type),
          allow_any_json_cast_type_(allow_any_json_cast_type),
          active_count_(active_count),
          batch_size_(batch_size),
          consistency_level_(consistency_level),
          is_json_contains_(is_json_contains) {
        size_per_chunk_ = segment_->size_per_chunk();
        AssertInfo(
            batch_size_ > 0,
            fmt::format("expr batch size should greater than zero, but now: {}",
                        batch_size_));
        InitSegmentExpr();
    }

    void
    InitSegmentExpr() {
        auto& schema = segment_->get_schema();
        auto& field_meta = schema[field_id_];
        field_type_ = field_meta.get_data_type();

        if (schema.get_primary_field_id().has_value() &&
            schema.get_primary_field_id().value() == field_id_ &&
            IsPrimaryKeyDataType(field_meta.get_data_type())) {
            is_pk_field_ = true;
            pk_type_ = field_meta.get_data_type();
        }

        pinned_index_ = PinIndex(op_ctx_,
                                 segment_,
                                 field_meta,
                                 nested_path_,
                                 value_type_,
                                 allow_any_json_cast_type_,
                                 is_json_contains_);
        if (pinned_index_.size() > 0) {
            num_index_chunk_ = pinned_index_.size();
        }
        // if index not include raw data, also need load data
        if (segment_->HasFieldData(field_id_)) {
            if (segment_->is_chunked()) {
                num_data_chunk_ = segment_->num_chunk_data(field_id_);
            } else {
                num_data_chunk_ = upper_div(active_count_, size_per_chunk_);
            }
        }
    }

    virtual bool
    IsSource() const override {
        return true;
    }

    void
    MoveCursorForDataMultipleChunk() {
        int64_t processed_size = 0;
        for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) {
            auto data_pos =
                (i == current_data_chunk_) ? current_data_chunk_pos_ : 0;
            // if segment is chunked, type won't be growing
            int64_t size = segment_->chunk_size(field_id_, i) - data_pos;

            size = std::min(size, batch_size_ - processed_size);

            processed_size += size;
            if (processed_size >= batch_size_) {
                current_data_chunk_ = i;
                current_data_chunk_pos_ = data_pos + size;
                current_data_global_pos_ =
                    current_data_global_pos_ + processed_size;
                break;
            }
            // }
        }
    }
    void
    MoveCursorForDataSingleChunk() {
        if (segment_->type() == SegmentType::Sealed) {
            auto size =
                std::min(active_count_ - current_data_chunk_pos_, batch_size_);
            current_data_chunk_pos_ += size;
            current_data_global_pos_ += size;
        } else {
            int64_t processed_size = 0;
            for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) {
                auto data_pos =
                    (i == current_data_chunk_) ? current_data_chunk_pos_ : 0;
                auto size = (i == (num_data_chunk_ - 1) &&
                             active_count_ % size_per_chunk_ != 0)
                                ? active_count_ % size_per_chunk_ - data_pos
                                : size_per_chunk_ - data_pos;

                size = std::min(size, batch_size_ - processed_size);

                processed_size += size;
                if (processed_size >= batch_size_) {
                    current_data_chunk_ = i;
                    current_data_chunk_pos_ = data_pos + size;
                    current_data_global_pos_ =
                        current_data_global_pos_ + processed_size;
                    break;
                }
            }
        }
    }

    void
    MoveCursorForData() {
        if (segment_->is_chunked()) {
            MoveCursorForDataMultipleChunk();
        } else {
            MoveCursorForDataSingleChunk();
        }
    }

    void
    MoveCursorForIndex() {
        AssertInfo(segment_->type() == SegmentType::Sealed,
                   "index mode only for sealed segment");
        auto size =
            std::min(active_count_ - current_index_chunk_pos_, batch_size_);

        current_index_chunk_pos_ += size;
    }

    void
    MoveCursor() override {
        // when we specify input, do not maintain states
        if (!has_offset_input_) {
            // CanUseIndex excludes ngram index and this is true even ngram index is used as ExecNgramMatch
            // uses data cursor.
            if (SegmentExpr::CanUseIndex()) {
                MoveCursorForIndex();
                if (segment_->HasFieldData(field_id_)) {
                    MoveCursorForData();
                }
            } else {
                MoveCursorForData();
            }
        }
    }

    void
    ApplyValidData(const bool* valid_data,
                   TargetBitmapView res,
                   TargetBitmapView valid_res,
                   const int size) {
        if (valid_data != nullptr) {
            for (int i = 0; i < size; i++) {
                if (!valid_data[i]) {
                    res[i] = valid_res[i] = false;
                }
            }
        }
    }

    int64_t
    GetNextBatchSize() {
        auto current_chunk = SegmentExpr::CanUseIndex() && use_index_
                                 ? current_index_chunk_
                                 : current_data_chunk_;
        auto current_chunk_pos = SegmentExpr::CanUseIndex() && use_index_
                                     ? current_index_chunk_pos_
                                     : current_data_chunk_pos_;
        auto current_rows = 0;
        if (segment_->is_chunked()) {
            current_rows =
                SegmentExpr::CanUseIndex() && use_index_ &&
                        segment_->type() == SegmentType::Sealed
                    ? current_chunk_pos
                    : segment_->num_rows_until_chunk(field_id_, current_chunk) +
                          current_chunk_pos;
        } else {
            current_rows = current_chunk * size_per_chunk_ + current_chunk_pos;
        }
        return current_rows + batch_size_ >= active_count_
                   ? active_count_ - current_rows
                   : batch_size_;
    }

    // used for processing raw data expr for sealed segments.
    // now only used for std::string_view && json
    // TODO: support more types
    template <typename T,
              bool NeedSegmentOffsets = false,
              typename FUNC,
              typename... ValTypes>
    int64_t
    ProcessChunkForSealedSeg(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        // For sealed segment, only single chunk
        Assert(num_data_chunk_ == 1);
        auto need_size =
            std::min(active_count_ - current_data_chunk_pos_, batch_size_);
        if (need_size == 0)
            return 0;  //do not go empty-loop at the bound of the chunk

        auto& skip_index = segment_->GetSkipIndex();
        auto pw = segment_->get_batch_views<T>(
            op_ctx_, field_id_, 0, current_data_chunk_pos_, need_size);
        auto views_info = pw.get();
        if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
            (!namespace_skip_func_.has_value() ||
             !namespace_skip_func_.value()(0))) {
            // first is the raw data, second is valid_data
            // use valid_data to see if raw data is null
            if constexpr (NeedSegmentOffsets) {
                // For GIS functions: construct segment offsets array
                std::vector<int32_t> segment_offsets_array(need_size);
                for (int64_t j = 0; j < need_size; ++j) {
                    segment_offsets_array[j] =
                        static_cast<int32_t>(current_data_chunk_pos_ + j);
                }
                func(views_info.first.data(),
                     views_info.second.data(),
                     nullptr,
                     segment_offsets_array.data(),
                     need_size,
                     res,
                     valid_res,
                     values...);
            } else {
                func(views_info.first.data(),
                     views_info.second.data(),
                     nullptr,
                     need_size,
                     res,
                     valid_res,
                     values...);
            }
        } else {
            ApplyValidData(views_info.second.data(), res, valid_res, need_size);
        }
        current_data_chunk_pos_ += need_size;
        return need_size;
    }

    // accept offsets array and process on the scalar data by offsets
    // stateless! Just check and set bitset as result, does not need to move cursor
    // used for processing raw data expr for sealed segments.
    // now only used for std::string_view && json
    // TODO: support more types
    template <typename T, typename FUNC, typename... ValTypes>
    int64_t
    ProcessDataByOffsetsForSealedSeg(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        OffsetVector* input,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        // For non_chunked sealed segment, only single chunk
        Assert(num_data_chunk_ == 1);

        auto& skip_index = segment_->GetSkipIndex();
        auto pw =
            segment_->get_views_by_offsets<T>(op_ctx_, field_id_, 0, *input);
        auto [data_vec, valid_data] = pw.get();
        if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
            (!namespace_skip_func_.has_value() ||
             !namespace_skip_func_.value()(0))) {
            func(data_vec.data(),
                 valid_data.data(),
                 nullptr,
                 input->size(),
                 res,
                 valid_res,
                 values...);
        } else {
            ApplyValidData(valid_data.data(), res, valid_res, input->size());
        }
        return input->size();
    }

    template <typename T, typename FUNC, typename... ValTypes>
    VectorPtr
    ProcessIndexChunksByOffsets(FUNC func,
                                OffsetVector* input,
                                ValTypes... values) {
        AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
        using IndexInnerType = std::
            conditional_t<std::is_same_v<T, std::string_view>, std::string, T>;
        using Index = index::ScalarIndex<IndexInnerType>;
        TargetBitmap valid_res(input->size());

        auto scalar_index = dynamic_cast<const Index*>(pinned_index_[0].get());
        auto* index_ptr = const_cast<Index*>(scalar_index);

        auto valid_result = index_ptr->IsNotNull();
        for (auto i = 0; i < input->size(); ++i) {
            valid_res[i] = valid_result[(*input)[i]];
        }
        auto result = std::move(func.template operator()<FilterType::random>(
            index_ptr, values..., input->data()));
        return std::make_shared<ColumnVector>(std::move(result),
                                              std::move(valid_res));
    }

    // when we have scalar index and index contains raw data, could go with index chunk by offsets
    template <typename T, typename FUNC, typename... ValTypes>
    int64_t
    ProcessIndexLookupByOffsets(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        OffsetVector* input,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        AssertInfo(num_index_chunk_ == 1, "scalar index chunk num must be 1");
        auto& skip_index = segment_->GetSkipIndex();

        using IndexInnerType = std::
            conditional_t<std::is_same_v<T, std::string_view>, std::string, T>;
        using Index = index::ScalarIndex<IndexInnerType>;
        auto scalar_index = dynamic_cast<const Index*>(pinned_index_[0].get());
        auto* index_ptr = const_cast<Index*>(scalar_index);
        auto valid_result = index_ptr->IsNotNull();
        auto batch_size = input->size();

        if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
            (!namespace_skip_func_.has_value() ||
             !namespace_skip_func_.value()(0))) {
            for (auto i = 0; i < batch_size; ++i) {
                auto offset = (*input)[i];
                auto raw = index_ptr->Reverse_Lookup(offset);
                if (!raw.has_value()) {
                    res[i] = false;
                    continue;
                }
                T raw_data = raw.value();
                bool valid_data = valid_result[offset];
                func.template operator()<FilterType::random>(&raw_data,
                                                             &valid_data,
                                                             nullptr,
                                                             1,
                                                             res + i,
                                                             valid_res + i,
                                                             values...);
            }
        } else {
            for (auto i = 0; i < batch_size; ++i) {
                auto offset = (*input)[i];
                res[i] = valid_res[i] = valid_result[offset];
            }
        }

        return batch_size;
    }

    // accept offsets array and process on the scalar data by offsets
    // stateless! Just check and set bitset as result, does not need to move cursor
    template <typename T, typename FUNC, typename... ValTypes>
    int64_t
    ProcessDataByOffsets(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        OffsetVector* input,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        int64_t processed_size = 0;

        // index reverse lookup
        if (SegmentExpr::CanUseIndex() && num_data_chunk_ == 0) {
            return ProcessIndexLookupByOffsets<T>(
                func, skip_func, input, res, valid_res, values...);
        }

        auto& skip_index = segment_->GetSkipIndex();

        // raw data scan
        // sealed segment
        if (segment_->type() == SegmentType::Sealed) {
            if (segment_->is_chunked()) {
                if constexpr (std::is_same_v<T, std::string_view> ||
                              std::is_same_v<T, Json> ||
                              std::is_same_v<T, ArrayView>) {
                    for (size_t i = 0; i < input->size(); ++i) {
                        int64_t offset = (*input)[i];
                        auto [chunk_id, chunk_offset] =
                            segment_->get_chunk_by_offset(field_id_, offset);
                        auto pw = segment_->get_views_by_offsets<T>(
                            op_ctx_,
                            field_id_,
                            chunk_id,
                            {int32_t(chunk_offset)});
                        auto [data_vec, valid_data] = pw.get();
                        if ((!skip_func ||
                             !skip_func(skip_index, field_id_, chunk_id)) &&
                            (!namespace_skip_func_.has_value() ||
                             !namespace_skip_func_.value()(chunk_id))) {
                            func.template operator()<FilterType::random>(
                                data_vec.data(),
                                valid_data.data(),
                                nullptr,
                                1,
                                res + processed_size,
                                valid_res + processed_size,
                                values...);
                        } else {
                            if (valid_data.size() > processed_size &&
                                !valid_data[processed_size]) {
                                res[processed_size] =
                                    valid_res[processed_size] = false;
                            }
                        }
                        processed_size++;
                    }
                    return input->size();
                }
                for (size_t i = 0; i < input->size(); ++i) {
                    int64_t offset = (*input)[i];
                    auto [chunk_id, chunk_offset] =
                        segment_->get_chunk_by_offset(field_id_, offset);
                    auto pw =
                        segment_->chunk_data<T>(op_ctx_, field_id_, chunk_id);
                    auto chunk = pw.get();
                    const T* data = chunk.data() + chunk_offset;
                    const bool* valid_data = chunk.valid_data();
                    if (valid_data != nullptr) {
                        valid_data += chunk_offset;
                    }
                    if ((!skip_func ||
                         !skip_func(skip_index, field_id_, chunk_id)) &&
                        (!namespace_skip_func_.has_value() ||
                         !namespace_skip_func_.value()(chunk_id))) {
                        func.template operator()<FilterType::random>(
                            data,
                            valid_data,
                            nullptr,
                            1,
                            res + processed_size,
                            valid_res + processed_size,
                            values...);
                    } else {
                        ApplyValidData(valid_data,
                                       res + processed_size,
                                       valid_res + processed_size,
                                       1);
                    }
                    processed_size++;
                }
                return input->size();
            } else {
                if constexpr (std::is_same_v<T, std::string_view> ||
                              std::is_same_v<T, Json> ||
                              std::is_same_v<T, ArrayView>) {
                    return ProcessDataByOffsetsForSealedSeg<T>(
                        func, skip_func, input, res, valid_res, values...);
                }
                auto pw = segment_->chunk_data<T>(op_ctx_, field_id_, 0);
                auto chunk = pw.get();
                const T* data = chunk.data();
                const bool* valid_data = chunk.valid_data();
                if ((!skip_func || !skip_func(skip_index, field_id_, 0)) &&
                    (!namespace_skip_func_.has_value() ||
                     !namespace_skip_func_.value()(0))) {
                    func.template operator()<FilterType::random>(data,
                                                                 valid_data,
                                                                 input->data(),
                                                                 input->size(),
                                                                 res,
                                                                 valid_res,
                                                                 values...);
                } else {
                    ApplyValidData(valid_data, res, valid_res, input->size());
                }
                return input->size();
            }
        } else {
            // growing segment
            for (size_t i = 0; i < input->size(); ++i) {
                int64_t offset = (*input)[i];
                auto chunk_id = offset / size_per_chunk_;
                auto chunk_offset = offset % size_per_chunk_;
                auto pw = segment_->chunk_data<T>(op_ctx_, field_id_, chunk_id);
                auto chunk = pw.get();
                const T* data = chunk.data() + chunk_offset;
                const bool* valid_data = chunk.valid_data();
                if (valid_data != nullptr) {
                    valid_data += chunk_offset;
                }
                if ((!skip_func ||
                     !skip_func(skip_index, field_id_, chunk_id)) &&
                    (!namespace_skip_func_.has_value() ||
                     !namespace_skip_func_.value()(chunk_id))) {
                    func.template operator()<FilterType::random>(
                        data,
                        valid_data,
                        nullptr,
                        1,
                        res + processed_size,
                        valid_res + processed_size,
                        values...);
                } else {
                    ApplyValidData(valid_data,
                                   res + processed_size,
                                   valid_res + processed_size,
                                   1);
                }
                processed_size++;
            }
        }
        return input->size();
    }

    // Template parameter to control whether segment offsets are needed (for GIS functions)
    template <typename T,
              bool NeedSegmentOffsets = false,
              typename FUNC,
              typename... ValTypes>
    int64_t
    ProcessDataChunksForSingleChunk(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        int64_t processed_size = 0;
        if constexpr (std::is_same_v<T, std::string_view> ||
                      std::is_same_v<T, Json>) {
            if (segment_->type() == SegmentType::Sealed) {
                return ProcessChunkForSealedSeg<T, NeedSegmentOffsets>(
                    func, skip_func, res, valid_res, values...);
            }
        }

        for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) {
            auto data_pos =
                (i == current_data_chunk_) ? current_data_chunk_pos_ : 0;
            auto size =
                (i == (num_data_chunk_ - 1))
                    ? (segment_->type() == SegmentType::Growing
                           ? (active_count_ % size_per_chunk_ == 0
                                  ? size_per_chunk_ - data_pos
                                  : active_count_ % size_per_chunk_ - data_pos)
                           : active_count_ - data_pos)
                    : size_per_chunk_ - data_pos;

            size = std::min(size, batch_size_ - processed_size);
            if (size == 0)
                continue;  //do not go empty-loop at the bound of the chunk

            auto& skip_index = segment_->GetSkipIndex();
            auto pw = segment_->chunk_data<T>(op_ctx_, field_id_, i);
            auto chunk = pw.get();
            const bool* valid_data = chunk.valid_data();
            if (valid_data != nullptr) {
                valid_data += data_pos;
            }
            if ((!skip_func || !skip_func(skip_index, field_id_, i)) &&
                (!namespace_skip_func_.has_value() ||
                 !namespace_skip_func_.value()(i))) {
                const T* data = chunk.data() + data_pos;

                if constexpr (NeedSegmentOffsets) {
                    // For GIS functions: construct segment offsets array
                    std::vector<int32_t> segment_offsets_array(size);
                    for (int64_t j = 0; j < size; ++j) {
                        segment_offsets_array[j] = static_cast<int32_t>(
                            size_per_chunk_ * i + data_pos + j);
                    }
                    func(data,
                         valid_data,
                         nullptr,
                         segment_offsets_array.data(),
                         size,
                         res + processed_size,
                         valid_res + processed_size,
                         values...);
                } else {
                    func(data,
                         valid_data,
                         nullptr,
                         size,
                         res + processed_size,
                         valid_res + processed_size,
                         values...);
                }
            } else {
                ApplyValidData(valid_data,
                               res + processed_size,
                               valid_res + processed_size,
                               size);
            }

            processed_size += size;
            if (processed_size >= batch_size_) {
                current_data_chunk_ = i;
                current_data_chunk_pos_ = data_pos + size;
                break;
            }
        }

        return processed_size;
    }

    // If process_all_chunks is true, all chunks will be processed and no inner state will be changed.
    template <typename T,
              bool NeedSegmentOffsets = false,
              typename FUNC,
              typename... ValTypes>
    int64_t
    ProcessMultipleChunksCommon(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        bool process_all_chunks,
        ValTypes... values) {
        int64_t processed_size = 0;

        size_t start_chunk = process_all_chunks ? 0 : current_data_chunk_;

        for (size_t i = start_chunk; i < num_data_chunk_; i++) {
            auto data_pos =
                process_all_chunks
                    ? 0
                    : (i == current_data_chunk_ ? current_data_chunk_pos_ : 0);

            // if segment is chunked, type won't be growing
            int64_t size = segment_->chunk_size(field_id_, i) - data_pos;
            // process a whole chunk if process_all_chunks is true
            if (!process_all_chunks) {
                size = std::min(size, batch_size_ - processed_size);
            }

            if (size == 0)
                continue;  //do not go empty-loop at the bound of the chunk
            std::vector<int32_t> segment_offsets_array(size);
            auto start_offset =
                segment_->num_rows_until_chunk(field_id_, i) + data_pos;
            for (int64_t j = 0; j < size; ++j) {
                int64_t offset = start_offset + j;
                segment_offsets_array[j] = static_cast<int32_t>(offset);
            }
            auto& skip_index = segment_->GetSkipIndex();
            if ((!skip_func || !skip_func(skip_index, field_id_, i)) &&
                (!namespace_skip_func_.has_value() ||
                 !namespace_skip_func_.value()(i))) {
                bool is_seal = false;
                if constexpr (std::is_same_v<T, std::string_view> ||
                              std::is_same_v<T, Json> ||
                              std::is_same_v<T, ArrayView>) {
                    if (segment_->type() == SegmentType::Sealed) {
                        // first is the raw data, second is valid_data
                        // use valid_data to see if raw data is null
                        auto pw = segment_->get_batch_views<T>(
                            op_ctx_, field_id_, i, data_pos, size);
                        auto [data_vec, valid_data] = pw.get();

                        if constexpr (NeedSegmentOffsets) {
                            func(data_vec.data(),
                                 valid_data.data(),
                                 nullptr,
                                 segment_offsets_array.data(),
                                 size,
                                 res + processed_size,
                                 valid_res + processed_size,
                                 values...);
                        } else {
                            func(data_vec.data(),
                                 valid_data.data(),
                                 nullptr,
                                 size,
                                 res + processed_size,
                                 valid_res + processed_size,
                                 values...);
                        }

                        is_seal = true;
                    }
                }
                if (!is_seal) {
                    auto pw = segment_->chunk_data<T>(op_ctx_, field_id_, i);
                    auto chunk = pw.get();
                    const T* data = chunk.data() + data_pos;
                    const bool* valid_data = chunk.valid_data();
                    if (valid_data != nullptr) {
                        valid_data += data_pos;
                    }

                    if constexpr (NeedSegmentOffsets) {
                        // For GIS functions: construct segment offsets array
                        func(data,
                             valid_data,
                             nullptr,
                             segment_offsets_array.data(),
                             size,
                             res + processed_size,
                             valid_res + processed_size,
                             values...);
                    } else {
                        func(data,
                             valid_data,
                             nullptr,
                             size,
                             res + processed_size,
                             valid_res + processed_size,
                             values...);
                    }
                }
            } else {
                const bool* valid_data;
                if constexpr (std::is_same_v<T, std::string_view> ||
                              std::is_same_v<T, Json> ||
                              std::is_same_v<T, ArrayView>) {
                    auto pw = segment_->get_batch_views<T>(
                        op_ctx_, field_id_, i, data_pos, size);
                    valid_data = pw.get().second.data();
                    ApplyValidData(valid_data,
                                   res + processed_size,
                                   valid_res + processed_size,
                                   size);
                } else {
                    auto pw = segment_->chunk_data<T>(op_ctx_, field_id_, i);
                    auto chunk = pw.get();
                    valid_data = chunk.valid_data();
                    if (valid_data != nullptr) {
                        valid_data += data_pos;
                    }
                    ApplyValidData(valid_data,
                                   res + processed_size,
                                   valid_res + processed_size,
                                   size);
                }
            }

            processed_size += size;

            if (!process_all_chunks && processed_size >= batch_size_) {
                current_data_chunk_ = i;
                current_data_chunk_pos_ = data_pos + size;
                break;
            }
        }

        return processed_size;
    }

    template <typename T,
              bool NeedSegmentOffsets = false,
              typename FUNC,
              typename... ValTypes>
    int64_t
    ProcessDataChunksForMultipleChunk(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        return ProcessMultipleChunksCommon<T, NeedSegmentOffsets>(
            func, skip_func, res, valid_res, false, values...);
    }

    template <typename T, typename FUNC, typename... ValTypes>
    int64_t
    ProcessAllChunksForMultipleChunk(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        return ProcessMultipleChunksCommon<T>(
            func, skip_func, res, valid_res, true, values...);
    }

    template <typename T,
              bool NeedSegmentOffsets = false,
              typename FUNC,
              typename... ValTypes>
    int64_t
    ProcessDataChunks(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        if (segment_->is_chunked()) {
            return ProcessDataChunksForMultipleChunk<T, NeedSegmentOffsets>(
                func, skip_func, res, valid_res, values...);
        } else {
            return ProcessDataChunksForSingleChunk<T, NeedSegmentOffsets>(
                func, skip_func, res, valid_res, values...);
        }
    }

    template <typename T, typename FUNC, typename... ValTypes>
    int64_t
    ProcessAllDataChunk(
        FUNC func,
        std::function<bool(const milvus::SkipIndex&, FieldId, int)> skip_func,
        TargetBitmapView res,
        TargetBitmapView valid_res,
        ValTypes... values) {
        if (segment_->is_chunked()) {
            return ProcessAllChunksForMultipleChunk<T>(
                func, skip_func, res, valid_res, values...);
        } else {
            ThrowInfo(ErrorCode::Unsupported, "unreachable");
        }
    }

    int
    ProcessIndexOneChunk(TargetBitmap& result,
                         TargetBitmap& valid_result,
                         size_t chunk_id,
                         const TargetBitmap& chunk_res,
                         const TargetBitmap& chunk_valid_res,
                         int processed_rows) {
        auto data_pos =
            chunk_id == current_index_chunk_ ? current_index_chunk_pos_ : 0;
        auto size = std::min(
            std::min(size_per_chunk_ - data_pos, batch_size_ - processed_rows),
            int64_t(chunk_res.size()));

        //        result.insert(result.end(),
        //                      chunk_res.begin() + data_pos,
        //                      chunk_res.begin() + data_pos + size);
        result.append(chunk_res, data_pos, size);
        valid_result.append(chunk_valid_res, data_pos, size);
        return size;
    }

    template <typename T, typename FUNC, typename... ValTypes>
    VectorPtr
    ProcessIndexChunks(FUNC func, ValTypes... values) {
        typedef std::
            conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
                IndexInnerType;
        using Index = index::ScalarIndex<IndexInnerType>;
        TargetBitmap result;
        TargetBitmap valid_result;
        int processed_rows = 0;

        for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
            // This cache result help getting result for every batch loop.
            // It avoids indexing execute for every batch because indexing
            // executing costs quite much time.
            if (cached_index_chunk_id_ != i) {
                Index* index_ptr = nullptr;
                PinWrapper<const index::IndexBase*> json_pw;
                PinWrapper<const Index*> pw;
                // Executor for JsonFlatIndex. Must outlive index_ptr. Only used for JSON type.
                std::shared_ptr<
                    index::JsonFlatIndexQueryExecutor<IndexInnerType>>
                    executor;

                if (field_type_ == DataType::JSON) {
                    auto pointer = milvus::Json::pointer(nested_path_);

                    json_pw = pinned_index_[i];
                    // check if it is a json flat index, if so, create a json flat index query executor
                    auto json_flat_index =
                        dynamic_cast<const index::JsonFlatIndex*>(
                            json_pw.get());

                    if (json_flat_index) {
                        auto index_path = json_flat_index->GetNestedPath();
                        executor =
                            json_flat_index
                                ->template create_executor<IndexInnerType>(
                                    pointer.substr(index_path.size()));
                        index_ptr = executor.get();
                    } else {
                        auto json_index =
                            const_cast<index::IndexBase*>(json_pw.get());
                        index_ptr = dynamic_cast<Index*>(json_index);
                    }
                } else {
                    auto scalar_index =
                        dynamic_cast<const Index*>(pinned_index_[i].get());
                    index_ptr = const_cast<Index*>(scalar_index);
                }
                cached_index_chunk_res_ = std::make_shared<TargetBitmap>(
                    std::move(func(index_ptr, values...)));
                auto valid_result = index_ptr->IsNotNull();
                cached_index_chunk_valid_res_ =
                    std::make_shared<TargetBitmap>(std::move(valid_result));
                cached_index_chunk_id_ = i;
            }

            auto size = ProcessIndexOneChunk(result,
                                             valid_result,
                                             i,
                                             *cached_index_chunk_res_,
                                             *cached_index_chunk_valid_res_,
                                             processed_rows);

            if (processed_rows + size >= batch_size_) {
                current_index_chunk_ = i;
                current_index_chunk_pos_ = i == current_index_chunk_
                                               ? current_index_chunk_pos_ + size
                                               : size;
                break;
            }
            processed_rows += size;
        }

        return std::make_shared<ColumnVector>(std::move(result),
                                              std::move(valid_result));
    }

    template <typename T>
    TargetBitmap
    ProcessChunksForValid(bool use_index) {
        if (use_index) {
            // when T is ArrayView, the ScalarIndex<T> shall be ScalarIndex<ElementType>
            // NOT ScalarIndex<ArrayView>
            if (std::is_same_v<T, ArrayView>) {
                auto element_type =
                    segment_->get_schema()[field_id_].get_element_type();
                switch (element_type) {
                    case DataType::BOOL: {
                        return ProcessIndexChunksForValid<bool>();
                    }
                    case DataType::INT8: {
                        return ProcessIndexChunksForValid<int8_t>();
                    }
                    case DataType::INT16: {
                        return ProcessIndexChunksForValid<int16_t>();
                    }
                    case DataType::INT32: {
                        return ProcessIndexChunksForValid<int32_t>();
                    }
                    case DataType::INT64: {
                        return ProcessIndexChunksForValid<int64_t>();
                    }
                    case DataType::FLOAT: {
                        return ProcessIndexChunksForValid<float>();
                    }
                    case DataType::DOUBLE: {
                        return ProcessIndexChunksForValid<double>();
                    }
                    case DataType::STRING:
                    case DataType::VARCHAR: {
                        return ProcessIndexChunksForValid<std::string>();
                    }
                    case DataType::GEOMETRY: {
                        return ProcessIndexChunksForValid<std::string>();
                    }
                    default:
                        ThrowInfo(DataTypeInvalid,
                                  "unsupported element type: {}",
                                  element_type);
                }
            }
            return ProcessIndexChunksForValid<T>();
        } else {
            return ProcessDataChunksForValid<T>();
        }
    }

    template <typename T>
    TargetBitmap
    ProcessChunksForValidByOffsets(bool use_index, const OffsetVector& input) {
        typedef std::
            conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
                IndexInnerType;
        using Index = index::ScalarIndex<IndexInnerType>;
        auto batch_size = input.size();
        TargetBitmap valid_result(batch_size);
        valid_result.set();

        if (use_index) {
            // when T is ArrayView, the ScalarIndex<T> shall be ScalarIndex<ElementType>
            // NOT ScalarIndex<ArrayView>
            if (std::is_same_v<T, ArrayView>) {
                auto element_type =
                    segment_->get_schema()[field_id_].get_element_type();
                switch (element_type) {
                    case DataType::BOOL: {
                        return ProcessChunksForValidByOffsets<bool>(use_index,
                                                                    input);
                    }
                    case DataType::INT8: {
                        return ProcessChunksForValidByOffsets<int8_t>(use_index,
                                                                      input);
                    }
                    case DataType::INT16: {
                        return ProcessChunksForValidByOffsets<int16_t>(
                            use_index, input);
                    }
                    case DataType::INT32: {
                        return ProcessChunksForValidByOffsets<int32_t>(
                            use_index, input);
                    }
                    case DataType::INT64: {
                        return ProcessChunksForValidByOffsets<int64_t>(
                            use_index, input);
                    }
                    case DataType::FLOAT: {
                        return ProcessChunksForValidByOffsets<float>(use_index,
                                                                     input);
                    }
                    case DataType::DOUBLE: {
                        return ProcessChunksForValidByOffsets<double>(use_index,
                                                                      input);
                    }
                    case DataType::STRING:
                    case DataType::VARCHAR: {
                        return ProcessChunksForValidByOffsets<std::string>(
                            use_index, input);
                    }
                    default:
                        ThrowInfo(DataTypeInvalid,
                                  "unsupported element type: {}",
                                  element_type);
                }
            }
            auto scalar_index =
                dynamic_cast<const Index*>(pinned_index_[0].get());
            auto* index_ptr = const_cast<Index*>(scalar_index);
            const auto& res = index_ptr->IsNotNull();
            for (auto i = 0; i < batch_size; ++i) {
                valid_result[i] = res[input[i]];
            }
        } else {
            for (auto i = 0; i < batch_size; ++i) {
                auto offset = input[i];
                auto [chunk_id,
                      chunk_offset] = [&]() -> std::pair<int64_t, int64_t> {
                    if (segment_->type() == SegmentType::Growing) {
                        return {offset / size_per_chunk_,
                                offset % size_per_chunk_};
                    } else if (segment_->is_chunked()) {
                        return segment_->get_chunk_by_offset(field_id_, offset);
                    } else {
                        return {0, offset};
                    }
                }();
                auto pw = segment_->chunk_data<T>(op_ctx_, field_id_, chunk_id);
                auto chunk = pw.get();
                const bool* valid_data = chunk.valid_data();
                if (valid_data != nullptr) {
                    valid_result[i] = valid_data[chunk_offset];
                } else {
                    break;
                }
            }
        }
        return valid_result;
    }

    template <typename T>
    TargetBitmap
    ProcessDataChunksForValid() {
        TargetBitmap valid_result(GetNextBatchSize());
        valid_result.set();
        int64_t processed_size = 0;
        for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) {
            auto data_pos =
                (i == current_data_chunk_) ? current_data_chunk_pos_ : 0;
            int64_t size = 0;
            if (segment_->is_chunked()) {
                size = segment_->chunk_size(field_id_, i) - data_pos;
            } else {
                size = (i == (num_data_chunk_ - 1))
                           ? (segment_->type() == SegmentType::Growing
                                  ? (active_count_ % size_per_chunk_ == 0
                                         ? size_per_chunk_ - data_pos
                                         : active_count_ % size_per_chunk_ -
                                               data_pos)
                                  : active_count_ - data_pos)
                           : size_per_chunk_ - data_pos;
            }

            size = std::min(size, batch_size_ - processed_size);
            if (size == 0)
                continue;  //do not go empty-loop at the bound of the chunk
            bool access_sealed_variable_column = false;
            if constexpr (std::is_same_v<T, std::string_view> ||
                          std::is_same_v<T, Json> ||
                          std::is_same_v<T, ArrayView>) {
                if (segment_->type() == SegmentType::Sealed) {
                    auto pw = segment_->get_batch_views<T>(
                        op_ctx_, field_id_, i, data_pos, size);
                    auto [data_vec, valid_data] = pw.get();
                    ApplyValidData(valid_data.data(),
                                   valid_result + processed_size,
                                   valid_result + processed_size,
                                   size);
                    access_sealed_variable_column = true;
                }
            }

            if (!access_sealed_variable_column) {
                auto pw = segment_->chunk_data<T>(op_ctx_, field_id_, i);
                auto chunk = pw.get();
                const bool* valid_data = chunk.valid_data();
                if (valid_data == nullptr) {
                    return valid_result;
                }
                valid_data += data_pos;
                ApplyValidData(valid_data,
                               valid_result + processed_size,
                               valid_result + processed_size,
                               size);
            }

            processed_size += size;
            if (processed_size >= batch_size_) {
                current_data_chunk_ = i;
                current_data_chunk_pos_ = data_pos + size;
                break;
            }
        }
        return valid_result;
    }

    int
    ProcessIndexOneChunkForValid(TargetBitmap& valid_result,
                                 size_t chunk_id,
                                 const TargetBitmap& chunk_valid_res,
                                 int processed_rows) {
        auto data_pos =
            chunk_id == current_index_chunk_ ? current_index_chunk_pos_ : 0;
        auto size = std::min(
            std::min(size_per_chunk_ - data_pos, batch_size_ - processed_rows),
            int64_t(chunk_valid_res.size()));
        if (field_type_ == DataType::GEOMETRY &&
            segment_->type() == SegmentType::Growing) {
            size = std::min(batch_size_ - processed_rows,
                            int64_t(chunk_valid_res.size()) - data_pos);
        }
        valid_result.append(chunk_valid_res, data_pos, size);
        return size;
    }

    template <typename T>
    TargetBitmap
    ProcessIndexChunksForValid() {
        using IndexInnerType = std::
            conditional_t<std::is_same_v<T, std::string_view>, std::string, T>;
        using Index = index::ScalarIndex<IndexInnerType>;
        int processed_rows = 0;
        TargetBitmap valid_result;
        valid_result.set();

        for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
            // This cache result help getting result for every batch loop.
            // It avoids indexing execute for every batch because indexing
            // executing costs quite much time.
            if (cached_index_chunk_id_ != i) {
                auto scalar_index =
                    dynamic_cast<const Index*>(pinned_index_[i].get());
                auto* index_ptr = const_cast<Index*>(scalar_index);
                auto execute_sub_batch = [](Index* index_ptr) {
                    TargetBitmap res = index_ptr->IsNotNull();
                    return res;
                };
                cached_index_chunk_valid_res_ = std::make_shared<TargetBitmap>(
                    std::move(execute_sub_batch(index_ptr)));
                cached_index_chunk_id_ = i;
            }

            auto size =
                ProcessIndexOneChunkForValid(valid_result,
                                             i,
                                             *cached_index_chunk_valid_res_,
                                             processed_rows);

            if (processed_rows + size >= batch_size_) {
                current_index_chunk_ = i;
                current_index_chunk_pos_ = i == current_index_chunk_
                                               ? current_index_chunk_pos_ + size
                                               : size;
                break;
            }
            processed_rows += size;
        }
        return valid_result;
    }

    template <typename T, typename FUNC, typename... ValTypes>
    void
    ProcessIndexChunksV2(FUNC func, ValTypes... values) {
        typedef std::
            conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
                IndexInnerType;
        using Index = index::ScalarIndex<IndexInnerType>;

        for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
            auto scalar_index =
                dynamic_cast<const Index*>(pinned_index_[i].get());
            auto* index_ptr = const_cast<Index*>(scalar_index);
            func(index_ptr, values...);
        }
    }

    bool
    CanUseIndex() const {
        // Ngram index should be used in specific execution path (CanUseNgramIndex -> ExecNgramMatch).
        // TODO: if multiple indexes are supported, this logic should be changed
        return num_index_chunk_ != 0 && !CanUseNgramIndex();
    }

    template <typename T>
    bool
    CanUseIndexForOp(OpType op) const {
        typedef std::
            conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
                IndexInnerType;
        if constexpr (!std::is_same_v<IndexInnerType, std::string>) {
            return true;
        }

        using Index = index::ScalarIndex<IndexInnerType>;
        if (op == OpType::Match || op == OpType::InnerMatch ||
            op == OpType::PostfixMatch) {
            auto scalar_index = dynamic_cast<const Index*>(
                pinned_index_[current_index_chunk_].get());
            auto* index_ptr = const_cast<Index*>(scalar_index);
            // 1, index support regex query and try use it, then index handles the query;
            // 2, index has raw data, then call index.Reverse_Lookup to handle the query;
            return (index_ptr->TryUseRegexQuery() &&
                    index_ptr->SupportRegexQuery()) ||
                   index_ptr->HasRawData();
        }
        return true;
    }

    template <typename T>
    bool
    IndexHasRawData() const {
        typedef std::
            conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
                IndexInnerType;

        using Index = index::ScalarIndex<IndexInnerType>;
        for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
            auto scalar_index =
                dynamic_cast<const Index*>(pinned_index_[i].get());
            auto* index_ptr = const_cast<Index*>(scalar_index);
            if (!index_ptr->HasRawData()) {
                return false;
            }
        }

        return true;
    }

    void
    SetNotUseIndex() {
        use_index_ = false;
    }

    bool
    PlanUseJsonStats(EvalCtx& context) const {
        return context.get_exec_context()
            ->get_query_context()
            ->get_plan_options()
            .expr_use_json_stats;
    }

    bool
    HasJsonStats(FieldId field_id) const {
        return segment_->type() == SegmentType::Sealed &&
               segment_->GetJsonStats(op_ctx_, field_id).get() != nullptr;
    }

    bool
    CanUseJsonStats(EvalCtx& context,
                    FieldId field_id,
                    const std::vector<std::string>& nested_path) const {
        // if path contains integer, we can't use json stats such as "a.1.b", "a.1",
        // because we can't know the integer is a key or a array indice
        auto path_contains_integer = [](const std::vector<std::string>& path) {
            for (auto i = 0; i < path.size(); i++) {
                if (milvus::IsInteger(path[i])) {
                    return true;
                }
            }
            return false;
        };

        // if path is empty, json stats can not know key name,
        // so we can't use json shredding data
        return PlanUseJsonStats(context) && HasJsonStats(field_id) &&
               !nested_path.empty() && !path_contains_integer(nested_path);
    }

    virtual bool
    CanUseNgramIndex() const {
        return false;
    };

 protected:
    const segcore::SegmentInternalInterface* segment_;
    const FieldId field_id_;
    bool is_pk_field_{false};
    DataType pk_type_;
    int64_t batch_size_;

    std::vector<std::string> nested_path_;
    DataType field_type_;
    DataType value_type_;
    bool allow_any_json_cast_type_{false};
    bool is_json_contains_{false};
    bool is_data_mode_{false};
    // sometimes need to skip index and using raw data
    // default true means use index as much as possible
    bool use_index_{true};
    std::vector<PinWrapper<const index::IndexBase*>> pinned_index_{};

    int64_t active_count_{0};
    int64_t num_data_chunk_{0};
    int64_t num_index_chunk_{0};
    // State indicate position that expr computing at
    // because expr maybe called for every batch.
    int64_t current_data_chunk_{0};
    int64_t current_data_chunk_pos_{0};
    int64_t current_data_global_pos_{0};
    int64_t current_index_chunk_{0};
    int64_t current_index_chunk_pos_{0};
    int64_t size_per_chunk_{0};

    // Cache for index scan to avoid search index every batch
    int64_t cached_index_chunk_id_{-1};
    std::shared_ptr<TargetBitmap> cached_index_chunk_res_{nullptr};
    // Cache for chunk valid res.
    std::shared_ptr<TargetBitmap> cached_index_chunk_valid_res_{nullptr};

    // Cache for text match.
    std::shared_ptr<TargetBitmap> cached_match_res_{nullptr};
    int32_t consistency_level_{0};

    // Cache for ngram match.
    std::shared_ptr<TargetBitmap> cached_ngram_match_res_{nullptr};
};

bool
IsLikeExpr(std::shared_ptr<Expr> expr);

void
OptimizeCompiledExprs(ExecContext* context, const std::vector<ExprPtr>& exprs);

std::vector<ExprPtr>
CompileExpressions(const std::vector<expr::TypedExprPtr>& logical_exprs,
                   ExecContext* context,
                   const std::unordered_set<std::string>& flatten_cadidates =
                       std::unordered_set<std::string>(),
                   bool enable_constant_folding = false);

std::vector<ExprPtr>
CompileInputs(const expr::TypedExprPtr& expr,
              QueryContext* config,
              const std::unordered_set<std::string>& flatten_cadidates);

ExprPtr
CompileExpression(const expr::TypedExprPtr& expr,
                  QueryContext* context,
                  const std::unordered_set<std::string>& flatten_cadidates,
                  bool enable_constant_folding);

class ExprSet {
 public:
    explicit ExprSet(const std::vector<expr::TypedExprPtr>& logical_exprs,
                     ExecContext* exec_ctx)
        : exec_ctx_(exec_ctx) {
        exprs_ = CompileExpressions(logical_exprs, exec_ctx);
    }

    virtual ~ExprSet() = default;

    void
    Eval(EvalCtx& ctx, std::vector<VectorPtr>& results) {
        Eval(0, exprs_.size(), true, ctx, results);
    }

    virtual void
    Eval(int32_t begin,
         int32_t end,
         bool initialize,
         EvalCtx& ctx,
         std::vector<VectorPtr>& result);

    void
    Clear() {
        exprs_.clear();
    }

    ExecContext*
    get_exec_context() const {
        return exec_ctx_;
    }

    size_t
    size() const {
        return exprs_.size();
    }

    const std::vector<std::shared_ptr<Expr>>&
    exprs() const {
        return exprs_;
    }

    const std::shared_ptr<Expr>&
    expr(int32_t index) const {
        return exprs_[index];
    }

 private:
    std::vector<std::shared_ptr<Expr>> exprs_;
    ExecContext* exec_ctx_;
};

}  //namespace exec
}  // namespace milvus