mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
Reverse data from scalar index (#17145)
Signed-off-by: xige-16 <xi.ge@zilliz.com>
This commit is contained in:
parent
3928da6493
commit
56778787be
@ -24,6 +24,7 @@ endif ()
|
||||
|
||||
target_link_libraries(milvus_index
|
||||
milvus_proto
|
||||
milvus_exceptions
|
||||
knowhere
|
||||
${PLATFORM_LIBS}
|
||||
)
|
||||
|
||||
@ -38,6 +38,9 @@ class ScalarIndex : public IndexBase {
|
||||
virtual const TargetBitmapPtr
|
||||
Range(T lower_bound_value, bool lb_inclusive, T upper_bound_value, bool ub_inclusive) = 0;
|
||||
|
||||
virtual T
|
||||
Reverse_Lookup(size_t offset) const = 0;
|
||||
|
||||
const TargetBitmapPtr
|
||||
Query(const DatasetPtr& dataset) override;
|
||||
};
|
||||
|
||||
@ -42,6 +42,7 @@ template <typename T>
|
||||
inline void
|
||||
ScalarIndexSort<T>::Build(const size_t n, const T* values) {
|
||||
data_.reserve(n);
|
||||
idx_to_offsets_.resize(n);
|
||||
T* p = const_cast<T*>(values);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
data_.emplace_back(IndexStructure(*p++, i));
|
||||
@ -59,15 +60,16 @@ ScalarIndexSort<T>::build() {
|
||||
throw std::invalid_argument("ScalarIndexSort cannot build null values!");
|
||||
}
|
||||
std::sort(data_.begin(), data_.end());
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
}
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline BinarySet
|
||||
ScalarIndexSort<T>::Serialize(const Config& config) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
|
||||
auto index_data_size = data_.size() * sizeof(IndexStructure<T>);
|
||||
std::shared_ptr<uint8_t[]> index_data(new uint8_t[index_data_size]);
|
||||
@ -92,16 +94,18 @@ ScalarIndexSort<T>::Load(const BinarySet& index_binary) {
|
||||
|
||||
auto index_data = index_binary.GetByName("index_data");
|
||||
data_.resize(index_size);
|
||||
idx_to_offsets_.resize(index_size);
|
||||
memcpy(data_.data(), index_data->data.get(), (size_t)index_data->size);
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
idx_to_offsets_[data_[i].idx_] = i;
|
||||
}
|
||||
is_built_ = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline const TargetBitmapPtr
|
||||
ScalarIndexSort<T>::In(const size_t n, const T* values) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmapPtr bitset = std::make_unique<TargetBitmap>(data_.size());
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto lb = std::lower_bound(data_.begin(), data_.end(), IndexStructure<T>(*(values + i)));
|
||||
@ -120,9 +124,7 @@ ScalarIndexSort<T>::In(const size_t n, const T* values) {
|
||||
template <typename T>
|
||||
inline const TargetBitmapPtr
|
||||
ScalarIndexSort<T>::NotIn(const size_t n, const T* values) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmapPtr bitset = std::make_unique<TargetBitmap>(data_.size());
|
||||
bitset->set();
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
@ -142,9 +144,7 @@ ScalarIndexSort<T>::NotIn(const size_t n, const T* values) {
|
||||
template <typename T>
|
||||
inline const TargetBitmapPtr
|
||||
ScalarIndexSort<T>::Range(const T value, const OpType op) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmapPtr bitset = std::make_unique<TargetBitmap>(data_.size());
|
||||
auto lb = data_.begin();
|
||||
auto ub = data_.end();
|
||||
@ -173,13 +173,11 @@ ScalarIndexSort<T>::Range(const T value, const OpType op) {
|
||||
template <typename T>
|
||||
inline const TargetBitmapPtr
|
||||
ScalarIndexSort<T>::Range(T lower_bound_value, bool lb_inclusive, T upper_bound_value, bool ub_inclusive) {
|
||||
if (!is_built_) {
|
||||
build();
|
||||
}
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
TargetBitmapPtr bitset = std::make_unique<TargetBitmap>(data_.size());
|
||||
if (lower_bound_value > upper_bound_value) {
|
||||
std::swap(lower_bound_value, upper_bound_value);
|
||||
std::swap(lb_inclusive, ub_inclusive);
|
||||
if (lower_bound_value > upper_bound_value ||
|
||||
(lower_bound_value == upper_bound_value && !(lb_inclusive && ub_inclusive))) {
|
||||
return bitset;
|
||||
}
|
||||
auto lb = data_.begin();
|
||||
auto ub = data_.end();
|
||||
@ -199,4 +197,14 @@ ScalarIndexSort<T>::Range(T lower_bound_value, bool lb_inclusive, T upper_bound_
|
||||
return bitset;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T
|
||||
ScalarIndexSort<T>::Reverse_Lookup(size_t idx) const {
|
||||
AssertInfo(idx < idx_to_offsets_.size(), "out of range of total count");
|
||||
AssertInfo(is_built_, "index has not been built");
|
||||
|
||||
auto offset = idx_to_offsets_[idx];
|
||||
return data_[offset].a_;
|
||||
}
|
||||
|
||||
} // namespace milvus::scalar
|
||||
|
||||
@ -15,9 +15,9 @@
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "knowhere/common/Exception.h"
|
||||
#include "index/IndexStructure.h"
|
||||
#include <string>
|
||||
#include "index/ScalarIndex.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
@ -60,6 +60,9 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
||||
const TargetBitmapPtr
|
||||
Range(T lower_bound_value, bool lb_inclusive, T upper_bound_value, bool ub_inclusive) override;
|
||||
|
||||
T
|
||||
Reverse_Lookup(size_t offset) const override;
|
||||
|
||||
public:
|
||||
const std::vector<IndexStructure<T>>&
|
||||
GetData() {
|
||||
@ -78,6 +81,7 @@ class ScalarIndexSort : public ScalarIndex<T> {
|
||||
|
||||
private:
|
||||
bool is_built_;
|
||||
std::vector<size_t> idx_to_offsets_; // used to retrieve.
|
||||
std::vector<IndexStructure<T>> data_;
|
||||
};
|
||||
|
||||
|
||||
@ -9,10 +9,6 @@
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "index/StringIndexMarisa.h"
|
||||
#include "index/Utils.h"
|
||||
#include "index/Index.h"
|
||||
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <boost/uuid/uuid_io.hpp>
|
||||
#include <boost/uuid/uuid_generators.hpp>
|
||||
@ -20,7 +16,11 @@
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <knowhere/common/Utils.h>
|
||||
#include "exceptions/EasyAssert.h"
|
||||
|
||||
#include "index/StringIndexMarisa.h"
|
||||
#include "index/Utils.h"
|
||||
#include "index/Index.h"
|
||||
#include "common/Utils.h"
|
||||
|
||||
namespace milvus::scalar {
|
||||
|
||||
@ -150,7 +150,35 @@ StringIndexMarisa::NotIn(size_t n, const std::string* values) {
|
||||
|
||||
const TargetBitmapPtr
|
||||
StringIndexMarisa::Range(std::string value, OpType op) {
|
||||
throw std::runtime_error("todo: unsupported now");
|
||||
auto count = Count();
|
||||
TargetBitmapPtr bitset = std::make_unique<TargetBitmap>(count);
|
||||
marisa::Agent agent;
|
||||
for (size_t offset = 0; offset < count; ++offset) {
|
||||
agent.set_query(str_ids_[offset]);
|
||||
trie_.reverse_lookup(agent);
|
||||
std::string raw_data(agent.key().ptr(), agent.key().length());
|
||||
bool set = false;
|
||||
switch (op) {
|
||||
case OpType::LessThan:
|
||||
set = raw_data.compare(value) < 0;
|
||||
break;
|
||||
case OpType::LessEqual:
|
||||
set = raw_data.compare(value) <= 0;
|
||||
break;
|
||||
case OpType::GreaterThan:
|
||||
set = raw_data.compare(value) > 0;
|
||||
break;
|
||||
case OpType::GreaterEqual:
|
||||
set = raw_data.compare(value) >= 0;
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument(std::string("Invalid OperatorType: ") + std::to_string((int)op) + "!");
|
||||
}
|
||||
if (set) {
|
||||
bitset->set(offset);
|
||||
}
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
const TargetBitmapPtr
|
||||
@ -158,7 +186,33 @@ StringIndexMarisa::Range(std::string lower_bound_value,
|
||||
bool lb_inclusive,
|
||||
std::string upper_bound_value,
|
||||
bool ub_inclusive) {
|
||||
throw std::runtime_error("todo: unsupported now");
|
||||
auto count = Count();
|
||||
TargetBitmapPtr bitset = std::make_unique<TargetBitmap>(count);
|
||||
if (lower_bound_value.compare(upper_bound_value) > 0 ||
|
||||
(lower_bound_value.compare(upper_bound_value) == 0 && !(lb_inclusive && ub_inclusive))) {
|
||||
return bitset;
|
||||
}
|
||||
marisa::Agent agent;
|
||||
for (size_t offset = 0; offset < count; ++offset) {
|
||||
agent.set_query(str_ids_[offset]);
|
||||
trie_.reverse_lookup(agent);
|
||||
std::string raw_data(agent.key().ptr(), agent.key().length());
|
||||
bool set = true;
|
||||
if (lb_inclusive) {
|
||||
set &= raw_data.compare(lower_bound_value) >= 0;
|
||||
} else {
|
||||
set &= raw_data.compare(lower_bound_value) > 0;
|
||||
}
|
||||
if (ub_inclusive) {
|
||||
set &= raw_data.compare(upper_bound_value) <= 0;
|
||||
} else {
|
||||
set &= raw_data.compare(upper_bound_value) < 0;
|
||||
}
|
||||
if (set) {
|
||||
bitset->set(offset);
|
||||
}
|
||||
}
|
||||
return bitset;
|
||||
}
|
||||
|
||||
const TargetBitmapPtr
|
||||
@ -215,6 +269,15 @@ StringIndexMarisa::prefix_match(const std::string& prefix) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string
|
||||
StringIndexMarisa::Reverse_Lookup(size_t offset) const {
|
||||
AssertInfo(offset < str_ids_.size(), "out of range of total count");
|
||||
marisa::Agent agent;
|
||||
agent.set_query(str_ids_[offset]);
|
||||
trie_.reverse_lookup(agent);
|
||||
return std::string(agent.key().ptr(), agent.key().length());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace milvus::scalar
|
||||
|
||||
@ -58,6 +58,9 @@ class StringIndexMarisa : public StringIndex {
|
||||
const TargetBitmapPtr
|
||||
PrefixMatch(std::string prefix) override;
|
||||
|
||||
std::string
|
||||
Reverse_Lookup(size_t offset) const override;
|
||||
|
||||
private:
|
||||
void
|
||||
fill_str_ids(size_t n, const std::string* values);
|
||||
|
||||
@ -29,6 +29,8 @@
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
using optype = proto::plan::OpType;
|
||||
|
||||
class ExprVisitor;
|
||||
|
||||
// Base of all Exprs
|
||||
|
||||
@ -64,9 +64,9 @@ class ExecExprVisitor : public ExprVisitor {
|
||||
auto
|
||||
ExecRangeVisitorImpl(FieldId field_id, IndexFunc func, ElementFunc element_func) -> BitsetType;
|
||||
|
||||
template <typename T, typename ElementFunc>
|
||||
template <typename T, typename IndexFunc, typename ElementFunc>
|
||||
auto
|
||||
ExecDataRangeVisitorImpl(FieldId field_id, ElementFunc element_func) -> BitsetType;
|
||||
ExecDataRangeVisitorImpl(FieldId field_id, IndexFunc index_func, ElementFunc element_func) -> BitsetType;
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
|
||||
@ -179,16 +179,25 @@ ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id, IndexFunc index_func, El
|
||||
return final_result;
|
||||
}
|
||||
|
||||
template <typename T, typename ElementFunc>
|
||||
template <typename T, typename IndexFunc, typename ElementFunc>
|
||||
auto
|
||||
ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id, ElementFunc element_func) -> BitsetType {
|
||||
ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id, IndexFunc index_func, ElementFunc element_func)
|
||||
-> BitsetType {
|
||||
auto& schema = segment_.get_schema();
|
||||
auto& field_meta = schema[field_id];
|
||||
auto size_per_chunk = segment_.size_per_chunk();
|
||||
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||
auto indexing_barrier = segment_.num_chunk_index(field_id);
|
||||
auto data_barrier = segment_.num_chunk_data(field_id);
|
||||
AssertInfo(std::max(data_barrier, indexing_barrier) == num_chunk,
|
||||
"max(data_barrier, index_barrier) not equal to num_chunk");
|
||||
std::deque<BitsetType> results;
|
||||
|
||||
for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||
// for growing segment, indexing_barrier will always less than data_barrier
|
||||
// so growing segment will always execute expr plan using raw data
|
||||
// if sealed segment has loaded raw data on this field, then index_barrier = 0 and data_barrier = 1
|
||||
// in this case, sealed segment execute expr plan using raw data
|
||||
for (auto chunk_id = 0; chunk_id < data_barrier; ++chunk_id) {
|
||||
auto this_size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * size_per_chunk : size_per_chunk;
|
||||
BitsetType result(this_size);
|
||||
auto chunk = segment_.chunk_data<T>(field_id, chunk_id);
|
||||
@ -199,6 +208,20 @@ ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id, ElementFunc element_
|
||||
AssertInfo(result.size() == this_size, "[ExecExprVisitor]Chunk result size not equal to expected size");
|
||||
results.emplace_back(std::move(result));
|
||||
}
|
||||
|
||||
// if sealed segment has loaded scalar index for this field, then index_barrier = 1 and data_barrier = 0
|
||||
// in this case, sealed segment execute expr plan using scalar index
|
||||
using Index = scalar::ScalarIndex<T>;
|
||||
for (auto chunk_id = data_barrier; chunk_id < indexing_barrier; ++chunk_id) {
|
||||
auto& indexing = segment_.chunk_scalar_index<T>(field_id, chunk_id);
|
||||
auto this_size = const_cast<Index*>(&indexing)->Count();
|
||||
BitsetType result(this_size);
|
||||
for (int offset = 0; offset < this_size; ++offset) {
|
||||
result[offset] = index_func(const_cast<Index*>(&indexing), offset);
|
||||
}
|
||||
results.emplace_back(std::move(result));
|
||||
}
|
||||
|
||||
auto final_result = Assemble(results);
|
||||
AssertInfo(final_result.size() == row_count_, "[ExecExprVisitor]Final result size not equal to row count");
|
||||
return final_result;
|
||||
@ -278,26 +301,46 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(BinaryArithOpEvalRa
|
||||
case OpType::Equal: {
|
||||
switch (arith_op) {
|
||||
case ArithOpType::Add: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x + right_operand) == val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) { return ((x + right_operand) == val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Sub: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x - right_operand) == val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) { return ((x - right_operand) == val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mul: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x * right_operand) == val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) { return ((x * right_operand) == val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Div: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x / right_operand) == val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) { return ((x / right_operand) == val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mod: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return static_cast<T>(fmod(x, right_operand)) == val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) {
|
||||
return (static_cast<T>(fmod(x, right_operand)) == val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported arithmetic operation");
|
||||
@ -307,26 +350,46 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(BinaryArithOpEvalRa
|
||||
case OpType::NotEqual: {
|
||||
switch (arith_op) {
|
||||
case ArithOpType::Add: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x + right_operand) != val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) { return ((x + right_operand) != val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Sub: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x - right_operand) != val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) { return ((x - right_operand) != val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mul: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x * right_operand) != val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) { return ((x * right_operand) != val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Div: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return (x / right_operand) != val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) { return ((x / right_operand) != val); };
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
case ArithOpType::Mod: {
|
||||
auto index_func = [val, right_operand](Index* index, size_t offset) {
|
||||
auto x = index->Reverse_Lookup(offset);
|
||||
return static_cast<T>(fmod(x, right_operand)) != val;
|
||||
};
|
||||
auto elem_func = [val, right_operand](T x) {
|
||||
return (static_cast<T>(fmod(x, right_operand)) != val);
|
||||
};
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, elem_func);
|
||||
return ExecDataRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported arithmetic operation");
|
||||
@ -351,11 +414,7 @@ ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw) ->
|
||||
bool upper_inclusive = expr.upper_inclusive_;
|
||||
T val1 = expr.lower_value_;
|
||||
T val2 = expr.upper_value_;
|
||||
// TODO: disable check?
|
||||
if (val1 > val2 || (val1 == val2 && !(lower_inclusive && upper_inclusive))) {
|
||||
BitsetType res(row_count_, false);
|
||||
return res;
|
||||
}
|
||||
|
||||
auto index_func = [=](Index* index) { return index->Range(val1, lower_inclusive, val2, upper_inclusive); };
|
||||
if (lower_inclusive && upper_inclusive) {
|
||||
auto elem_func = [val1, val2](T x) { return (val1 <= x && x <= val2); };
|
||||
@ -524,48 +583,109 @@ ExecExprVisitor::ExecCompareExprDispatcher(CompareExpr& expr, Op op) -> BitsetTy
|
||||
auto size_per_chunk = segment_.size_per_chunk();
|
||||
auto num_chunk = upper_div(row_count_, size_per_chunk);
|
||||
std::deque<BitsetType> bitsets;
|
||||
|
||||
// check for sealed segment, load either raw field data or index
|
||||
auto left_indexing_barrier = segment_.num_chunk_index(expr.left_field_id_);
|
||||
auto left_data_barrier = segment_.num_chunk_data(expr.left_field_id_);
|
||||
AssertInfo(std::max(left_data_barrier, left_indexing_barrier) == num_chunk,
|
||||
"max(left_data_barrier, left_indexing_barrier) not equal to num_chunk");
|
||||
|
||||
auto right_indexing_barrier = segment_.num_chunk_index(expr.right_field_id_);
|
||||
auto right_data_barrier = segment_.num_chunk_data(expr.right_field_id_);
|
||||
AssertInfo(std::max(right_data_barrier, right_indexing_barrier) == num_chunk,
|
||||
"max(right_data_barrier, right_indexing_barrier) not equal to num_chunk");
|
||||
|
||||
for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
|
||||
auto size = chunk_id == num_chunk - 1 ? row_count_ - chunk_id * size_per_chunk : size_per_chunk;
|
||||
auto getChunkData = [&, chunk_id](DataType type, FieldId field_id) -> std::function<const number(int)> {
|
||||
auto getChunkData = [&, chunk_id](DataType type, FieldId field_id,
|
||||
int64_t data_barrier) -> std::function<const number(int)> {
|
||||
switch (type) {
|
||||
case DataType::BOOL: {
|
||||
auto chunk_data = segment_.chunk_data<bool>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
if (chunk_id < data_barrier) {
|
||||
auto chunk_data = segment_.chunk_data<bool>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
// for case, sealed segment has loaded index for scalar field instead of raw data
|
||||
auto& indexing = segment_.chunk_scalar_index<bool>(field_id, chunk_id);
|
||||
return [&indexing](int i) -> const number { return indexing.Reverse_Lookup(i); };
|
||||
}
|
||||
}
|
||||
case DataType::INT8: {
|
||||
auto chunk_data = segment_.chunk_data<int8_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
if (chunk_id < data_barrier) {
|
||||
auto chunk_data = segment_.chunk_data<int8_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
// for case, sealed segment has loaded index for scalar field instead of raw data
|
||||
auto& indexing = segment_.chunk_scalar_index<int8_t>(field_id, chunk_id);
|
||||
return [&indexing](int i) -> const number { return indexing.Reverse_Lookup(i); };
|
||||
}
|
||||
}
|
||||
case DataType::INT16: {
|
||||
auto chunk_data = segment_.chunk_data<int16_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
if (chunk_id < data_barrier) {
|
||||
auto chunk_data = segment_.chunk_data<int16_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
// for case, sealed segment has loaded index for scalar field instead of raw data
|
||||
auto& indexing = segment_.chunk_scalar_index<int16_t>(field_id, chunk_id);
|
||||
return [&indexing](int i) -> const number { return indexing.Reverse_Lookup(i); };
|
||||
}
|
||||
}
|
||||
case DataType::INT32: {
|
||||
auto chunk_data = segment_.chunk_data<int32_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
if (chunk_id < data_barrier) {
|
||||
auto chunk_data = segment_.chunk_data<int32_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
// for case, sealed segment has loaded index for scalar field instead of raw data
|
||||
auto& indexing = segment_.chunk_scalar_index<int32_t>(field_id, chunk_id);
|
||||
return [&indexing](int i) -> const number { return indexing.Reverse_Lookup(i); };
|
||||
}
|
||||
}
|
||||
case DataType::INT64: {
|
||||
auto chunk_data = segment_.chunk_data<int64_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
if (chunk_id < data_barrier) {
|
||||
auto chunk_data = segment_.chunk_data<int64_t>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
// for case, sealed segment has loaded index for scalar field instead of raw data
|
||||
auto& indexing = segment_.chunk_scalar_index<int64_t>(field_id, chunk_id);
|
||||
return [&indexing](int i) -> const number { return indexing.Reverse_Lookup(i); };
|
||||
}
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
auto chunk_data = segment_.chunk_data<float>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
if (chunk_id < data_barrier) {
|
||||
auto chunk_data = segment_.chunk_data<float>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
// for case, sealed segment has loaded index for scalar field instead of raw data
|
||||
auto& indexing = segment_.chunk_scalar_index<float>(field_id, chunk_id);
|
||||
return [&indexing](int i) -> const number { return indexing.Reverse_Lookup(i); };
|
||||
}
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
auto chunk_data = segment_.chunk_data<double>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
if (chunk_id < data_barrier) {
|
||||
auto chunk_data = segment_.chunk_data<double>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
// for case, sealed segment has loaded index for scalar field instead of raw data
|
||||
auto& indexing = segment_.chunk_scalar_index<double>(field_id, chunk_id);
|
||||
return [&indexing](int i) -> const number { return indexing.Reverse_Lookup(i); };
|
||||
}
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto chunk_data = segment_.chunk_data<std::string>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
if (chunk_id < data_barrier) {
|
||||
auto chunk_data = segment_.chunk_data<std::string>(field_id, chunk_id).data();
|
||||
return [chunk_data](int i) -> const number { return chunk_data[i]; };
|
||||
} else {
|
||||
// for case, sealed segment has loaded index for scalar field instead of raw data
|
||||
auto& indexing = segment_.chunk_scalar_index<std::string>(field_id, chunk_id);
|
||||
return [&indexing](int i) -> const number { return indexing.Reverse_Lookup(i); };
|
||||
}
|
||||
}
|
||||
default:
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
};
|
||||
auto left = getChunkData(expr.left_data_type_, expr.left_field_id_);
|
||||
auto right = getChunkData(expr.right_data_type_, expr.right_field_id_);
|
||||
auto left = getChunkData(expr.left_data_type_, expr.left_field_id_, left_data_barrier);
|
||||
auto right = getChunkData(expr.right_data_type_, expr.right_field_id_, right_data_barrier);
|
||||
|
||||
BitsetType bitset(size);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
|
||||
@ -97,6 +97,13 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||
return indexing_record_.get_finished_ack();
|
||||
}
|
||||
|
||||
// count of chunk that has raw data
|
||||
int64_t
|
||||
num_chunk_data(FieldId field_id) const final {
|
||||
auto size = get_insert_record().ack_responder_.GetAck();
|
||||
return upper_div(size, segcore_config_.get_chunk_rows());
|
||||
}
|
||||
|
||||
// deprecated
|
||||
const knowhere::Index*
|
||||
chunk_index_impl(FieldId field_id, int64_t chunk_id) const final {
|
||||
|
||||
@ -133,6 +133,10 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
virtual int64_t
|
||||
num_chunk_index(FieldId field_id) const = 0;
|
||||
|
||||
// count of chunk that has raw data
|
||||
virtual int64_t
|
||||
num_chunk_data(FieldId field_id) const = 0;
|
||||
|
||||
virtual void
|
||||
mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const = 0;
|
||||
|
||||
|
||||
@ -80,6 +80,7 @@ void
|
||||
SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
auto field_id = FieldId(info.field_id);
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
|
||||
auto index = std::dynamic_pointer_cast<knowhere::VecIndex>(info.index);
|
||||
AssertInfo(info.index_params.count("metric_type"), "Can't get metric_type in index_params");
|
||||
@ -88,8 +89,11 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
|
||||
AssertInfo(row_count > 0, "Index count is 0");
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
AssertInfo(!get_bit(vecindex_ready_bitset_, field_id),
|
||||
"Can't get bitset element at " + std::to_string(field_id.get()));
|
||||
// Don't allow vector raw data and index exist at the same time
|
||||
AssertInfo(!get_bit(field_data_ready_bitset_, field_id),
|
||||
"vector index can't be loaded when raw data exists at field " + std::to_string(field_id.get()));
|
||||
AssertInfo(!get_bit(index_ready_bitset_, field_id),
|
||||
"vector index has been exist at " + std::to_string(field_id.get()));
|
||||
if (row_count_opt_.has_value()) {
|
||||
AssertInfo(row_count_opt_.value() == row_count, "load data has different row count from other columns");
|
||||
} else {
|
||||
@ -98,7 +102,8 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
|
||||
AssertInfo(!vector_indexings_.is_ready(field_id), "vec index is not ready");
|
||||
vector_indexings_.append_field_indexing(field_id, GetMetricType(metric_type_str), index);
|
||||
|
||||
set_bit(vecindex_ready_bitset_, field_id, true);
|
||||
set_bit(index_ready_bitset_, field_id, true);
|
||||
update_row_count(row_count);
|
||||
lck.unlock();
|
||||
}
|
||||
|
||||
@ -106,22 +111,52 @@ void
|
||||
SegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) {
|
||||
// NOTE: lock only when data is ready to avoid starvation
|
||||
auto field_id = FieldId(info.field_id);
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
|
||||
auto index = std::dynamic_pointer_cast<scalar::IndexBase>(info.index);
|
||||
auto row_count = index->Count();
|
||||
AssertInfo(row_count > 0, "Index count is 0");
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
|
||||
// Don't allow scalar raw data and index exist at the same time
|
||||
AssertInfo(!get_bit(field_data_ready_bitset_, field_id),
|
||||
"scalar index can't be loaded when raw data exists at field " + std::to_string(field_id.get()));
|
||||
AssertInfo(!get_bit(index_ready_bitset_, field_id),
|
||||
"scalar index has been exist at " + std::to_string(field_id.get()));
|
||||
if (row_count_opt_.has_value()) {
|
||||
AssertInfo(row_count_opt_.value() == row_count, "load data has different row count from other columns");
|
||||
} else {
|
||||
row_count_opt_ = row_count;
|
||||
}
|
||||
|
||||
scalar_indexings_[field_id] = std::move(index);
|
||||
scalar_indexings_[field_id] = index;
|
||||
// reverse pk from scalar index and set pks to offset
|
||||
if (schema_->get_primary_field_id() == field_id) {
|
||||
AssertInfo(field_id.get() != -1, "Primary key is -1");
|
||||
AssertInfo(pk2offset_.empty(), "already exists");
|
||||
switch (field_meta.get_data_type()) {
|
||||
case DataType::INT64: {
|
||||
auto int64_index = std::dynamic_pointer_cast<scalar::ScalarIndex<int64_t>>(info.index);
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
pk2offset_.insert(std::make_pair(int64_index->Reverse_Lookup(i), i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto string_index = std::dynamic_pointer_cast<scalar::ScalarIndex<std::string>>(info.index);
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
pk2offset_.insert(std::make_pair(string_index->Reverse_Lookup(i), i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported primary key type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
set_bit(index_ready_bitset_, field_id, true);
|
||||
update_row_count(row_count);
|
||||
lck.unlock();
|
||||
}
|
||||
|
||||
@ -167,12 +202,15 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
AssertInfo(data_type == DataType(info.field_data->type()),
|
||||
"field type of load data is inconsistent with the schema");
|
||||
auto field_data = insert_record_.get_field_data_base(field_id);
|
||||
AssertInfo(field_data->empty(), "already exists");
|
||||
|
||||
// write data under lock
|
||||
std::unique_lock lck(mutex_);
|
||||
|
||||
// Don't allow raw data and index exist at the same time
|
||||
AssertInfo(!get_bit(index_ready_bitset_, field_id), "field data can't be loaded when indexing exists");
|
||||
auto field_data = insert_record_.get_field_data_base(field_id);
|
||||
AssertInfo(field_data->empty(), "already exists");
|
||||
|
||||
// insert data to insertRecord
|
||||
field_data->fill_chunk_data(size, info.field_data, field_meta);
|
||||
AssertInfo(field_data->num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
@ -188,15 +226,6 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) {
|
||||
}
|
||||
}
|
||||
|
||||
if (field_meta.is_vector()) {
|
||||
AssertInfo(!vector_indexings_.is_ready(field_id), "field data can't be loaded when indexing exists");
|
||||
} else if (!scalar_indexings_.count(field_id)) {
|
||||
// generate scalar index
|
||||
std::unique_ptr<knowhere::Index> index;
|
||||
index = query::generate_scalar_index(field_data->get_span_base(0), data_type);
|
||||
scalar_indexings_[field_id] = std::move(index);
|
||||
}
|
||||
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
}
|
||||
update_row_count(info.row_count);
|
||||
@ -224,9 +253,22 @@ SegmentSealedImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) {
|
||||
deleted_record_.record_size_ = size;
|
||||
}
|
||||
|
||||
// internal API: support scalar index only
|
||||
int64_t
|
||||
SegmentSealedImpl::num_chunk_index(FieldId field_id) const {
|
||||
return 1;
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
if (field_meta.is_vector()) {
|
||||
return int64_t(vector_indexings_.is_ready(field_id));
|
||||
}
|
||||
|
||||
return scalar_indexings_.count(field_id);
|
||||
}
|
||||
|
||||
int64_t
|
||||
SegmentSealedImpl::num_chunk_data(FieldId field_id) const {
|
||||
auto field_data = insert_record_.get_field_data_base(field_id);
|
||||
AssertInfo(field_data != nullptr, "null field data ptr");
|
||||
return field_data->num_chunk();
|
||||
}
|
||||
|
||||
int64_t
|
||||
@ -253,8 +295,6 @@ SegmentSealedImpl::chunk_data_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
|
||||
const knowhere::Index*
|
||||
SegmentSealedImpl::chunk_index_impl(FieldId field_id, int64_t chunk_id) const {
|
||||
AssertInfo(chunk_id == 0, "Chunk_id is not equal to 0");
|
||||
// TODO: support scalar index
|
||||
auto ptr = scalar_indexings_.at(field_id).get();
|
||||
AssertInfo(ptr, "Scalar index of " + std::to_string(field_id.get()) + " is null");
|
||||
return ptr;
|
||||
@ -308,7 +348,7 @@ SegmentSealedImpl::vector_search(int64_t vec_count,
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
|
||||
AssertInfo(field_meta.is_vector(), "The meta type of vector field is not vector type");
|
||||
if (get_bit(vecindex_ready_bitset_, field_id)) {
|
||||
if (get_bit(index_ready_bitset_, field_id)) {
|
||||
AssertInfo(vector_indexings_.is_ready(field_id),
|
||||
"vector indexes isn't ready for field " + std::to_string(field_id.get()));
|
||||
query::SearchOnSealed(*schema_, vector_indexings_, search_info, query_data, query_count, bitset, output, id_);
|
||||
@ -383,7 +423,7 @@ SegmentSealedImpl::DropIndex(const FieldId field_id) {
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
vector_indexings_.drop_field_indexing(field_id);
|
||||
set_bit(vecindex_ready_bitset_, field_id, false);
|
||||
set_bit(index_ready_bitset_, field_id, false);
|
||||
}
|
||||
|
||||
void
|
||||
@ -396,7 +436,7 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const {
|
||||
}
|
||||
|
||||
auto& request_fields = plan->extra_info_opt_.value().involved_fields_;
|
||||
auto field_ready_bitset = field_data_ready_bitset_ | vecindex_ready_bitset_;
|
||||
auto field_ready_bitset = field_data_ready_bitset_ | index_ready_bitset_;
|
||||
AssertInfo(request_fields.size() == field_ready_bitset.size(),
|
||||
"Request fields size not equal to field ready bitset size when check search");
|
||||
auto absent_fields = request_fields - field_ready_bitset;
|
||||
@ -412,7 +452,7 @@ SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, int64_t segment_id)
|
||||
: schema_(schema),
|
||||
insert_record_(*schema, MAX_ROW_COUNT),
|
||||
field_data_ready_bitset_(schema->size()),
|
||||
vecindex_ready_bitset_(schema->size()),
|
||||
index_ready_bitset_(schema->size()),
|
||||
scalar_indexings_(schema->size()),
|
||||
id_(segment_id) {
|
||||
}
|
||||
@ -509,13 +549,26 @@ SegmentSealedImpl::fill_with_empty(FieldId field_id, int64_t count) const {
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
SegmentSealedImpl::bulk_subscript(FieldId field_id, const int64_t* seg_offsets, int64_t count) const {
|
||||
if (!HasFieldData(field_id)) {
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
// if count == 0, return empty data array
|
||||
if (count == 0) {
|
||||
return fill_with_empty(field_id, count);
|
||||
}
|
||||
|
||||
if (HasIndex(field_id)) {
|
||||
// if field has load scalar index, reverse raw data from index
|
||||
if (!datatype_is_vector(field_meta.get_data_type())) {
|
||||
AssertInfo(num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
|
||||
auto index = chunk_index_impl(field_id, 0);
|
||||
return ReverseDataFromIndex(index, seg_offsets, count, field_meta);
|
||||
}
|
||||
|
||||
// TODO: knowhere support reverse data from vector index
|
||||
// Now, real data will be filled in data array using chunk manager
|
||||
return fill_with_empty(field_id, count);
|
||||
}
|
||||
|
||||
Assert(get_bit(field_data_ready_bitset_, field_id));
|
||||
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
auto field_data = insert_record_.get_field_data_base(field_id);
|
||||
AssertInfo(field_data->num_chunk() == 1, std::string("num chunk not equal to 1 for sealed segment, num_chunk: ") +
|
||||
std::to_string(field_data->num_chunk()));
|
||||
@ -580,7 +633,7 @@ SegmentSealedImpl::HasIndex(FieldId field_id) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
AssertInfo(!SystemProperty::Instance().IsSystem(field_id),
|
||||
"Field id:" + std::to_string(field_id.get()) + " isn't one of system type when drop index");
|
||||
return get_bit(vecindex_ready_bitset_, field_id);
|
||||
return get_bit(index_ready_bitset_, field_id);
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@ -64,6 +64,10 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
int64_t
|
||||
num_chunk_index(FieldId field_id) const override;
|
||||
|
||||
// count of chunk that has raw data
|
||||
int64_t
|
||||
num_chunk_data(FieldId field_id) const override;
|
||||
|
||||
int64_t
|
||||
num_chunk() const override;
|
||||
|
||||
@ -168,7 +172,7 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
private:
|
||||
// segment loading state
|
||||
BitsetType field_data_ready_bitset_;
|
||||
BitsetType vecindex_ready_bitset_;
|
||||
BitsetType index_ready_bitset_;
|
||||
std::atomic<int> system_ready_count_ = 0;
|
||||
// segment datas
|
||||
|
||||
@ -188,7 +192,6 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
|
||||
// pks to row offset
|
||||
Pk2OffsetType pk2offset_;
|
||||
// std::unique_ptr<ScalarIndexBase> primary_key_index_;
|
||||
|
||||
SchemaPtr schema_;
|
||||
int64_t id_;
|
||||
|
||||
@ -9,7 +9,8 @@
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "Utils.h"
|
||||
#include "segcore/Utils.h"
|
||||
#include "index/ScalarIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -256,6 +257,115 @@ MergeDataArray(std::vector<std::pair<milvus::SearchResult*, int64_t>>& result_of
|
||||
return data_array;
|
||||
}
|
||||
|
||||
// TODO: split scalar IndexBase with knowhere::Index
|
||||
std::unique_ptr<DataArray>
|
||||
ReverseDataFromIndex(const knowhere::Index* index,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
const FieldMeta& field_meta) {
|
||||
auto data_type = field_meta.get_data_type();
|
||||
auto data_array = std::make_unique<DataArray>();
|
||||
data_array->set_field_id(field_meta.get_id().get());
|
||||
data_array->set_type(milvus::proto::schema::DataType(field_meta.get_data_type()));
|
||||
|
||||
auto scalar_array = data_array->mutable_scalars();
|
||||
switch (data_type) {
|
||||
case DataType::BOOL: {
|
||||
using IndexType = scalar::ScalarIndex<bool>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<bool> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
}
|
||||
auto obj = scalar_array->mutable_bool_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
break;
|
||||
}
|
||||
case DataType::INT8: {
|
||||
using IndexType = scalar::ScalarIndex<int8_t>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int8_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
}
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
break;
|
||||
}
|
||||
case DataType::INT16: {
|
||||
using IndexType = scalar::ScalarIndex<int16_t>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int16_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
}
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
break;
|
||||
}
|
||||
case DataType::INT32: {
|
||||
using IndexType = scalar::ScalarIndex<int32_t>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int32_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
}
|
||||
auto obj = scalar_array->mutable_int_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
break;
|
||||
}
|
||||
case DataType::INT64: {
|
||||
using IndexType = scalar::ScalarIndex<int64_t>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<int64_t> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
}
|
||||
auto obj = scalar_array->mutable_long_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
break;
|
||||
}
|
||||
case DataType::FLOAT: {
|
||||
using IndexType = scalar::ScalarIndex<float>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<float> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
}
|
||||
auto obj = scalar_array->mutable_float_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
break;
|
||||
}
|
||||
case DataType::DOUBLE: {
|
||||
using IndexType = scalar::ScalarIndex<double>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<double> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
}
|
||||
auto obj = scalar_array->mutable_double_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
using IndexType = scalar::ScalarIndex<std::string>;
|
||||
auto ptr = dynamic_cast<const IndexType*>(index);
|
||||
std::vector<std::string> raw_data(count);
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
raw_data[i] = ptr->Reverse_Lookup(seg_offsets[i]);
|
||||
}
|
||||
auto obj = scalar_array->mutable_string_data();
|
||||
*(obj->mutable_data()) = {raw_data.begin(), raw_data.end()};
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("unsupported datatype");
|
||||
}
|
||||
}
|
||||
|
||||
return data_array;
|
||||
}
|
||||
|
||||
// insert_barrier means num row of insert data in a segment
|
||||
// del_barrier means that if the pk of the insert data is in delete record[0 : del_barrier]
|
||||
// then the data corresponding to this pk may be ignored when searching/querying
|
||||
|
||||
@ -17,9 +17,10 @@
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
#include <knowhere/common/MetricType.h>
|
||||
#include "knowhere/index/Index.h"
|
||||
#include "common/QueryResult.h"
|
||||
#include "DeletedRecord.h"
|
||||
#include "InsertRecord.h"
|
||||
#include "segcore/DeletedRecord.h"
|
||||
#include "segcore/InsertRecord.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -89,4 +90,10 @@ get_deleted_bitmap(int64_t del_barrier,
|
||||
const Pk2OffsetType& pk2offset,
|
||||
Timestamp query_timestamp);
|
||||
|
||||
std::unique_ptr<DataArray>
|
||||
ReverseDataFromIndex(const knowhere::Index* index,
|
||||
const int64_t* seg_offsets,
|
||||
int64_t count,
|
||||
const FieldMeta& field_meta);
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
||||
@ -95,14 +95,14 @@ Search_Sealed(benchmark::State& state) {
|
||||
auto dataset_ = DataGen(schema, N);
|
||||
return dataset_;
|
||||
}();
|
||||
SealedLoader(dataset_, *segment);
|
||||
SealedLoadFieldData(dataset_, *segment);
|
||||
auto choice = state.range(0);
|
||||
if (choice == 0) {
|
||||
// Brute Force
|
||||
} else if (choice == 1) {
|
||||
// ivf
|
||||
auto vec = dataset_.get_col<float>(milvus::FieldId(100));
|
||||
auto indexing = GenIndexing(N, dim, vec.data());
|
||||
auto indexing = GenVecIndexing(N, dim, vec.data());
|
||||
LoadIndexInfo info;
|
||||
info.index = indexing;
|
||||
info.field_id = (*schema)[FieldName("fakevec")].get_id().get();
|
||||
|
||||
@ -66,22 +66,6 @@ get_default_schema_config() {
|
||||
return conf.c_str();
|
||||
}
|
||||
|
||||
std::vector<char>
|
||||
translate_text_plan_to_binary_plan(const char* text_plan) {
|
||||
proto::plan::PlanNode plan_node;
|
||||
auto ok = google::protobuf::TextFormat::ParseFromString(text_plan, &plan_node);
|
||||
AssertInfo(ok, "Failed to parse");
|
||||
|
||||
std::string binary_plan;
|
||||
plan_node.SerializeToString(&binary_plan);
|
||||
|
||||
std::vector<char> ret;
|
||||
ret.resize(binary_plan.size());
|
||||
std::memcpy(ret.data(), binary_plan.c_str(), binary_plan.size());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto
|
||||
generate_data(int N) {
|
||||
std::vector<char> raw_data;
|
||||
@ -1417,7 +1401,11 @@ TEST(CApiTest, Indexing_Without_Predicate) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::FloatVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -1538,7 +1526,11 @@ TEST(CApiTest, Indexing_Expr_Without_Predicate) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::FloatVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -1677,7 +1669,11 @@ TEST(CApiTest, Indexing_With_float_Predicate_Range) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::FloatVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -1830,7 +1826,11 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::FloatVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -1967,7 +1967,11 @@ TEST(CApiTest, Indexing_With_float_Predicate_Term) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::FloatVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -2113,7 +2117,11 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::FloatVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -2252,7 +2260,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::BinaryVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -2404,7 +2416,11 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::BinaryVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -2543,7 +2559,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::BinaryVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -2704,7 +2724,11 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::BinaryVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -2912,7 +2936,11 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
|
||||
status = LoadFieldData(segment, c_ts_field_data);
|
||||
assert(status.error_code == Success);
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, *(LoadIndexInfo*)c_load_index_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(FieldId(100));
|
||||
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index =
|
||||
Search(sealed_segment.get(), plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -3141,17 +3169,6 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
|
||||
|
||||
auto indexing = generate_index(vec_col.data(), conf, DIM, TOPK, N, IndexEnum::INDEX_FAISS_IVFPQ);
|
||||
|
||||
// gen query dataset
|
||||
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
|
||||
auto result_on_index = indexing->Query(query_dataset, conf, nullptr);
|
||||
auto ids = result_on_index->Get<int64_t*>(knowhere::meta::IDS);
|
||||
auto dis = result_on_index->Get<float*>(knowhere::meta::DISTANCE);
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
vec_dis.push_back(dis[j] * -1);
|
||||
}
|
||||
|
||||
auto binary_set = indexing->Serialize(conf);
|
||||
void* c_load_index_info = nullptr;
|
||||
status = NewLoadIndexInfo(&c_load_index_info);
|
||||
@ -3169,13 +3186,17 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
|
||||
AppendFieldInfo(c_load_index_info, 100, CDataType::FloatVector);
|
||||
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
|
||||
|
||||
// load vec index
|
||||
auto load_index_info = (LoadIndexInfo*)c_load_index_info;
|
||||
auto query_dataset2 = knowhere::GenDataset(num_queries, DIM, query_ptr);
|
||||
auto index = std::dynamic_pointer_cast<knowhere::VecIndex>(load_index_info->index);
|
||||
auto result_on_index2 = index->Query(query_dataset2, conf, nullptr);
|
||||
auto ids2 = result_on_index2->Get<int64_t*>(knowhere::meta::IDS);
|
||||
auto dis2 = result_on_index2->Get<float*>(knowhere::meta::DISTANCE);
|
||||
status = UpdateSealedSegmentIndex(segment, c_load_index_info);
|
||||
assert(status.error_code == Success);
|
||||
|
||||
// load raw data
|
||||
auto c_counter_field_data = CLoadFieldDataInfo{
|
||||
101,
|
||||
counter_data.data(),
|
||||
@ -3203,24 +3224,16 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
|
||||
status = LoadFieldData(segment, c_ts_field_data);
|
||||
assert(status.error_code == Success);
|
||||
|
||||
status = UpdateSealedSegmentIndex(segment, c_load_index_info);
|
||||
assert(status.error_code == Success);
|
||||
|
||||
auto counter_index = GenScalarIndexing(N, counter_col.data());
|
||||
auto counter_index_binary_set = counter_index->Serialize(conf);
|
||||
CLoadIndexInfo counter_index_info = nullptr;
|
||||
status = NewLoadIndexInfo(&counter_index_info);
|
||||
assert(status.error_code == Success);
|
||||
status = AppendFieldInfo(counter_index_info, 101, CDataType::Int64);
|
||||
assert(status.error_code == Success);
|
||||
std::string counter_index_type_key = "index_type";
|
||||
std::string counter_index_type_value = "sort";
|
||||
status = AppendIndexParam(counter_index_info, counter_index_type_key.c_str(), counter_index_type_value.c_str());
|
||||
assert(status.error_code == Success);
|
||||
status = AppendIndex(counter_index_info, (CBinarySet)&counter_index_binary_set);
|
||||
assert(status.error_code == Success);
|
||||
status = UpdateSealedSegmentIndex(segment, counter_index_info);
|
||||
assert(status.error_code == Success);
|
||||
// gen query dataset
|
||||
auto query_dataset = knowhere::GenDataset(num_queries, DIM, query_ptr);
|
||||
auto result_on_index = indexing->Query(query_dataset, conf, nullptr);
|
||||
auto ids = result_on_index->Get<int64_t*>(knowhere::meta::IDS);
|
||||
auto dis = result_on_index->Get<float*>(knowhere::meta::DISTANCE);
|
||||
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
|
||||
std::vector<float> vec_dis;
|
||||
for (int j = 0; j < TOPK * num_queries; ++j) {
|
||||
vec_dis.push_back(dis[j] * -1);
|
||||
}
|
||||
|
||||
CSearchResult c_search_result_on_bigIndex;
|
||||
auto res_after_load_index = Search(segment, plan, placeholderGroup, time, &c_search_result_on_bigIndex, -1);
|
||||
@ -3239,3 +3252,157 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
|
||||
DeleteCollection(collection);
|
||||
DeleteSegment(segment);
|
||||
}
|
||||
|
||||
TEST(CApiTest, RetriveScalarFieldFromSealedSegmentWithIndex) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
|
||||
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
|
||||
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
|
||||
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
|
||||
auto float_fid = schema->AddDebugField("age_float", DataType::FLOAT);
|
||||
auto double_fid = schema->AddDebugField("age_double", DataType::DOUBLE);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto segment = CreateSealedSegment(schema).release();
|
||||
|
||||
int N = ROW_COUNT;
|
||||
auto raw_data = DataGen(schema, N);
|
||||
LoadIndexInfo load_index_info;
|
||||
|
||||
// load timestamp field
|
||||
FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
|
||||
auto ts_array = CreateScalarDataArrayFrom(raw_data.timestamps_.data(), N, ts_field_meta);
|
||||
auto ts_data = serialize(ts_array.get());
|
||||
auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.data(), ts_data.size(), N};
|
||||
auto res = LoadFieldData(segment, load_info);
|
||||
assert(res.error_code == Success);
|
||||
auto count = GetRowCount(segment);
|
||||
assert(count == N);
|
||||
|
||||
// load rowid field
|
||||
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
|
||||
auto row_id_array = CreateScalarDataArrayFrom(raw_data.row_ids_.data(), N, row_id_field_meta);
|
||||
auto row_id_data = serialize(row_id_array.get());
|
||||
load_info = CLoadFieldDataInfo{RowFieldID.get(), row_id_data.data(), row_id_data.size(), N};
|
||||
res = LoadFieldData(segment, load_info);
|
||||
assert(res.error_code == Success);
|
||||
count = GetRowCount(segment);
|
||||
assert(count == N);
|
||||
|
||||
// load index for int8 field
|
||||
auto age8_col = raw_data.get_col<int8_t>(i8_fid);
|
||||
GenScalarIndexing(N, age8_col.data());
|
||||
auto age8_index = milvus::scalar::CreateScalarIndexSort<int8_t>();
|
||||
age8_index->Build(N, age8_col.data());
|
||||
load_index_info.field_id = i8_fid.get();
|
||||
load_index_info.field_type = Int8;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int8_t>>(age8_index.release());
|
||||
segment->LoadIndex(load_index_info);
|
||||
|
||||
// load index for 16 field
|
||||
auto age16_col = raw_data.get_col<int16_t>(i16_fid);
|
||||
GenScalarIndexing(N, age16_col.data());
|
||||
auto age16_index = milvus::scalar::CreateScalarIndexSort<int16_t>();
|
||||
age16_index->Build(N, age16_col.data());
|
||||
load_index_info.field_id = i16_fid.get();
|
||||
load_index_info.field_type = Int16;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int16_t>>(age16_index.release());
|
||||
segment->LoadIndex(load_index_info);
|
||||
|
||||
// load index for int32 field
|
||||
auto age32_col = raw_data.get_col<int32_t>(i32_fid);
|
||||
GenScalarIndexing(N, age32_col.data());
|
||||
auto age32_index = milvus::scalar::CreateScalarIndexSort<int32_t>();
|
||||
age32_index->Build(N, age32_col.data());
|
||||
load_index_info.field_id = i32_fid.get();
|
||||
load_index_info.field_type = Int32;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int32_t>>(age32_index.release());
|
||||
segment->LoadIndex(load_index_info);
|
||||
|
||||
// load index for int64 field
|
||||
auto age64_col = raw_data.get_col<int64_t>(i64_fid);
|
||||
GenScalarIndexing(N, age64_col.data());
|
||||
auto age64_index = milvus::scalar::CreateScalarIndexSort<int64_t>();
|
||||
age64_index->Build(N, age64_col.data());
|
||||
load_index_info.field_id = i64_fid.get();
|
||||
load_index_info.field_type = Int64;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int64_t>>(age64_index.release());
|
||||
segment->LoadIndex(load_index_info);
|
||||
|
||||
// load index for float field
|
||||
auto age_float_col = raw_data.get_col<float>(float_fid);
|
||||
GenScalarIndexing(N, age_float_col.data());
|
||||
auto age_float_index = milvus::scalar::CreateScalarIndexSort<float>();
|
||||
age_float_index->Build(N, age_float_col.data());
|
||||
load_index_info.field_id = float_fid.get();
|
||||
load_index_info.field_type = Float;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<float>>(age_float_index.release());
|
||||
segment->LoadIndex(load_index_info);
|
||||
|
||||
// load index for double field
|
||||
auto age_double_col = raw_data.get_col<double>(double_fid);
|
||||
GenScalarIndexing(N, age_double_col.data());
|
||||
auto age_double_index = milvus::scalar::CreateScalarIndexSort<double>();
|
||||
age_double_index->Build(N, age_double_col.data());
|
||||
load_index_info.field_id = double_fid.get();
|
||||
load_index_info.field_type = Float;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<double>>(age_double_index.release());
|
||||
segment->LoadIndex(load_index_info);
|
||||
|
||||
// create retrieve plan
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
|
||||
std::vector<int64_t> retrive_row_ids = {age64_col[0]};
|
||||
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(i64_fid, DataType::INT64, retrive_row_ids);
|
||||
plan->plan_node_->predicate_ = std::move(term_expr);
|
||||
std::vector<FieldId> target_field_ids;
|
||||
|
||||
// retrieve value
|
||||
target_field_ids = {i8_fid, i16_fid, i32_fid, i64_fid, float_fid, double_fid};
|
||||
plan->field_ids_ = target_field_ids;
|
||||
|
||||
CRetrieveResult retrieve_result;
|
||||
res = Retrieve(segment, plan.get(), raw_data.timestamps_[N - 1], &retrieve_result);
|
||||
ASSERT_EQ(res.error_code, Success);
|
||||
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
|
||||
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob, retrieve_result.proto_size);
|
||||
ASSERT_TRUE(suc);
|
||||
ASSERT_EQ(query_result->fields_data().size(), 6);
|
||||
auto fields_data = query_result->fields_data();
|
||||
for (auto iter = fields_data.begin(); iter < fields_data.end(); ++iter) {
|
||||
switch (iter->type()) {
|
||||
case proto::schema::DataType::Int8: {
|
||||
ASSERT_EQ(iter->scalars().int_data().data(0), age8_col[0]);
|
||||
break;
|
||||
}
|
||||
case proto::schema::DataType::Int16: {
|
||||
ASSERT_EQ(iter->scalars().int_data().data(0), age16_col[0]);
|
||||
break;
|
||||
}
|
||||
case proto::schema::DataType::Int32: {
|
||||
ASSERT_EQ(iter->scalars().int_data().data(0), age32_col[0]);
|
||||
break;
|
||||
}
|
||||
case proto::schema::DataType::Int64: {
|
||||
ASSERT_EQ(iter->scalars().long_data().data(0), age64_col[0]);
|
||||
break;
|
||||
}
|
||||
case proto::schema::DataType::Float: {
|
||||
ASSERT_EQ(iter->scalars().float_data().data(0), age_float_col[0]);
|
||||
break;
|
||||
}
|
||||
case proto::schema::DataType::Double: {
|
||||
ASSERT_EQ(iter->scalars().double_data().data(0), age_double_col[0]);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
PanicInfo("not supported type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DeleteRetrievePlan(plan.release());
|
||||
DeleteRetrieveResult(&retrieve_result);
|
||||
|
||||
DeleteSegment(segment);
|
||||
}
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
#include "query/generated/ExecExprVisitor.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
#include "index/IndexFactory.h"
|
||||
|
||||
using namespace milvus;
|
||||
|
||||
@ -583,6 +584,183 @@ TEST(Expr, TestCompare) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestCompareWithScalarIndex) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(int, int64_t)>>> testcases = {
|
||||
{R"(LessThan)", [](int a, int64_t b) { return a < b; }},
|
||||
{R"(LessEqual)", [](int a, int64_t b) { return a <= b; }},
|
||||
{R"(GreaterThan)", [](int a, int64_t b) { return a > b; }},
|
||||
{R"(GreaterEqual)", [](int a, int64_t b) { return a >= b; }},
|
||||
{R"(Equal)", [](int a, int64_t b) { return a == b; }},
|
||||
{R"(NotEqual)", [](int a, int64_t b) { return a != b; }},
|
||||
};
|
||||
|
||||
std::string serialized_expr_plan = R"(vector_anns: <
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
compare_expr: <
|
||||
left_column_info: <
|
||||
field_id: %3%
|
||||
data_type: %4%
|
||||
>
|
||||
right_column_info: <
|
||||
field_id: %5%
|
||||
data_type: %6%
|
||||
>
|
||||
op: %2%
|
||||
>
|
||||
>
|
||||
query_info: <
|
||||
topk: 10
|
||||
round_decimal: 3
|
||||
metric_type: "L2"
|
||||
search_params: "{\"nprobe\": 10}"
|
||||
>
|
||||
placeholder_tag: "$0"
|
||||
>)";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
|
||||
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateSealedSegment(schema);
|
||||
int N = 1000;
|
||||
auto raw_data = DataGen(schema, N);
|
||||
LoadIndexInfo load_index_info;
|
||||
|
||||
// load index for int32 field
|
||||
auto age32_col = raw_data.get_col<int32_t>(i32_fid);
|
||||
age32_col[0] = 1000;
|
||||
GenScalarIndexing(N, age32_col.data());
|
||||
auto age32_index = milvus::scalar::CreateScalarIndexSort<int32_t>();
|
||||
age32_index->Build(N, age32_col.data());
|
||||
load_index_info.field_id = i32_fid.get();
|
||||
load_index_info.field_type = Int32;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int32_t>>(age32_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
// load index for int64 field
|
||||
auto age64_col = raw_data.get_col<int64_t>(i64_fid);
|
||||
age64_col[0] = 2000;
|
||||
GenScalarIndexing(N, age64_col.data());
|
||||
auto age64_index = milvus::scalar::CreateScalarIndexSort<int64_t>();
|
||||
age64_index->Build(N, age64_col.data());
|
||||
load_index_info.field_id = i64_fid.get();
|
||||
load_index_info.field_type = Int64;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int64_t>>(age64_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
ExecExprVisitor visitor(*seg, seg->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func] : testcases) {
|
||||
auto dsl_string = boost::format(serialized_expr_plan) % vec_fid.get() % clause % i32_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::INT32)) % i64_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::INT64));
|
||||
auto binary_plan = translate_text_plan_to_binary_plan(dsl_string.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
// std::cout << ShowPlanNodeVisitor().call_child(*plan->plan_node_) << std::endl;
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N);
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
auto ans = final[i];
|
||||
auto val1 = age32_col[i];
|
||||
auto val2 = age64_col[i];
|
||||
auto ref = ref_func(val1, val2);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestCompareWithScalarIndexMaris) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(std::string, std::string)>>> testcases = {
|
||||
{R"(LessThan)", [](std::string a, std::string b) { return a.compare(b) < 0; }},
|
||||
{R"(LessEqual)", [](std::string a, std::string b) { return a.compare(b) <= 0; }},
|
||||
{R"(GreaterThan)", [](std::string a, std::string b) { return a.compare(b) > 0; }},
|
||||
{R"(GreaterEqual)", [](std::string a, std::string b) { return a.compare(b) >= 0; }},
|
||||
{R"(Equal)", [](std::string a, std::string b) { return a.compare(b) == 0; }},
|
||||
{R"(NotEqual)", [](std::string a, std::string b) { return a.compare(b) != 0; }},
|
||||
};
|
||||
|
||||
const char* serialized_expr_plan = R"(vector_anns: <
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
compare_expr: <
|
||||
left_column_info: <
|
||||
field_id: %3%
|
||||
data_type: VarChar
|
||||
>
|
||||
right_column_info: <
|
||||
field_id: %4%
|
||||
data_type: VarChar
|
||||
>
|
||||
op: %2%
|
||||
>
|
||||
>
|
||||
query_info: <
|
||||
topk: 10
|
||||
round_decimal: 3
|
||||
metric_type: "L2"
|
||||
search_params: "{\"nprobe\": 10}"
|
||||
>
|
||||
placeholder_tag: "$0"
|
||||
>)";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto str1_fid = schema->AddDebugField("string1", DataType::VARCHAR);
|
||||
auto str2_fid = schema->AddDebugField("string2", DataType::VARCHAR);
|
||||
schema->set_primary_field_id(str1_fid);
|
||||
|
||||
auto seg = CreateSealedSegment(schema);
|
||||
int N = 1000;
|
||||
auto raw_data = DataGen(schema, N);
|
||||
LoadIndexInfo load_index_info;
|
||||
|
||||
// load index for int32 field
|
||||
auto str1_col = raw_data.get_col<std::string>(str1_fid);
|
||||
GenScalarIndexing(N, str1_col.data());
|
||||
auto str1_index = milvus::scalar::CreateScalarIndexSort<std::string>();
|
||||
str1_index->Build(N, str1_col.data());
|
||||
load_index_info.field_id = str1_fid.get();
|
||||
load_index_info.field_type = VarChar;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<std::string>>(str1_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
// load index for int64 field
|
||||
auto str2_col = raw_data.get_col<std::string>(str2_fid);
|
||||
GenScalarIndexing(N, str2_col.data());
|
||||
auto str2_index = milvus::scalar::CreateScalarIndexSort<std::string>();
|
||||
str2_index->Build(N, str2_col.data());
|
||||
load_index_info.field_id = str2_fid.get();
|
||||
load_index_info.field_type = VarChar;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<std::string>>(str2_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
ExecExprVisitor visitor(*seg, seg->get_row_count(), MAX_TIMESTAMP);
|
||||
for (auto [clause, ref_func] : testcases) {
|
||||
auto dsl_string =
|
||||
boost::format(serialized_expr_plan) % vec_fid.get() % clause % str1_fid.get() % str2_fid.get();
|
||||
auto binary_plan = translate_text_plan_to_binary_plan(dsl_string.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
// std::cout << ShowPlanNodeVisitor().call_child(*plan->plan_node_) << std::endl;
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N);
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
auto ans = final[i];
|
||||
auto val1 = str1_col[i];
|
||||
auto val2 = str2_col[i];
|
||||
auto ref = ref_func(val1, val2);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << boost::format("[%1%, %2%]") % val1 % val2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestBinaryArithOpEvalRange) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
@ -960,3 +1138,302 @@ TEST(Expr, TestBinaryArithOpEvalRangeExceptions) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Expr, TestBinaryArithOpEvalRangeWithScalarSortIndex) {
|
||||
using namespace milvus::query;
|
||||
using namespace milvus::segcore;
|
||||
std::vector<std::tuple<std::string, std::function<bool(int)>, DataType>> testcases = {
|
||||
// Add test cases for BinaryArithOpEvalRangeExpr EQ of various data types
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
int64_val: 4
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
int64_val: 8
|
||||
>)",
|
||||
[](int8_t v) { return (v + 4) == 8; }, DataType::INT8},
|
||||
{R"(arith_op: Sub
|
||||
right_operand: <
|
||||
int64_val: 500
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
int64_val: 1500
|
||||
>)",
|
||||
[](int16_t v) { return (v - 500) == 1500; }, DataType::INT16},
|
||||
{R"(arith_op: Mul
|
||||
right_operand: <
|
||||
int64_val: 2
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
int64_val: 4000
|
||||
>)",
|
||||
[](int32_t v) { return (v * 2) == 4000; }, DataType::INT32},
|
||||
{R"(arith_op: Div
|
||||
right_operand: <
|
||||
int64_val: 2
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
int64_val: 1000
|
||||
>)",
|
||||
[](int64_t v) { return (v / 2) == 1000; }, DataType::INT64},
|
||||
{R"(arith_op: Mod
|
||||
right_operand: <
|
||||
int64_val: 100
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
int64_val: 0
|
||||
>)",
|
||||
[](int32_t v) { return (v % 100) == 0; }, DataType::INT32},
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
float_val: 500
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
float_val: 2500
|
||||
>)",
|
||||
[](float v) { return (v + 500) == 2500; }, DataType::FLOAT},
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
float_val: 500
|
||||
>
|
||||
op: Equal
|
||||
value: <
|
||||
float_val: 2500
|
||||
>)",
|
||||
[](double v) { return (v + 500) == 2500; }, DataType::DOUBLE},
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
float_val: 500
|
||||
>
|
||||
op: NotEqual
|
||||
value: <
|
||||
float_val: 2000
|
||||
>)",
|
||||
[](float v) { return (v + 500) != 2000; }, DataType::FLOAT},
|
||||
{R"(arith_op: Sub
|
||||
right_operand: <
|
||||
float_val: 500
|
||||
>
|
||||
op: NotEqual
|
||||
value: <
|
||||
float_val: 2500
|
||||
>)",
|
||||
[](double v) { return (v - 500) != 2000; }, DataType::DOUBLE},
|
||||
{R"(arith_op: Mul
|
||||
right_operand: <
|
||||
int64_val: 2
|
||||
>
|
||||
op: NotEqual
|
||||
value: <
|
||||
int64_val: 2
|
||||
>)",
|
||||
[](int8_t v) { return (v * 2) != 2; }, DataType::INT8},
|
||||
{R"(arith_op: Div
|
||||
right_operand: <
|
||||
int64_val: 2
|
||||
>
|
||||
op: NotEqual
|
||||
value: <
|
||||
int64_val: 2000
|
||||
>)",
|
||||
[](int16_t v) { return (v / 2) != 2000; }, DataType::INT16},
|
||||
{R"(arith_op: Mod
|
||||
right_operand: <
|
||||
int64_val: 100
|
||||
>
|
||||
op: NotEqual
|
||||
value: <
|
||||
int64_val: 1
|
||||
>)",
|
||||
[](int32_t v) { return (v % 100) != 1; }, DataType::INT32},
|
||||
{R"(arith_op: Add
|
||||
right_operand: <
|
||||
int64_val: 500
|
||||
>
|
||||
op: NotEqual
|
||||
value: <
|
||||
int64_val: 2000
|
||||
>)",
|
||||
[](int64_t v) { return (v + 500) != 2000; }, DataType::INT64},
|
||||
};
|
||||
|
||||
std::string serialized_expr_plan = R"(vector_anns: <
|
||||
field_id: %1%
|
||||
predicates: <
|
||||
binary_arith_op_eval_range_expr: <
|
||||
@@@@@
|
||||
>
|
||||
>
|
||||
query_info: <
|
||||
topk: 10
|
||||
round_decimal: 3
|
||||
metric_type: "L2"
|
||||
search_params: "{\"nprobe\": 10}"
|
||||
>
|
||||
placeholder_tag: "$0"
|
||||
>)";
|
||||
|
||||
std::string arith_expr = R"(
|
||||
column_info: <
|
||||
field_id: %2%
|
||||
data_type: %3%
|
||||
>
|
||||
@@@@)";
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto vec_fid = schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
||||
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
|
||||
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
|
||||
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
|
||||
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
|
||||
auto float_fid = schema->AddDebugField("age_float", DataType::FLOAT);
|
||||
auto double_fid = schema->AddDebugField("age_double", DataType::DOUBLE);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
auto seg = CreateSealedSegment(schema);
|
||||
int N = 1000;
|
||||
auto raw_data = DataGen(schema, N);
|
||||
LoadIndexInfo load_index_info;
|
||||
|
||||
// load index for int8 field
|
||||
auto age8_col = raw_data.get_col<int8_t>(i8_fid);
|
||||
age8_col[0] = 4;
|
||||
GenScalarIndexing(N, age8_col.data());
|
||||
auto age8_index = milvus::scalar::CreateScalarIndexSort<int8_t>();
|
||||
age8_index->Build(N, age8_col.data());
|
||||
load_index_info.field_id = i8_fid.get();
|
||||
load_index_info.field_type = Int8;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int8_t>>(age8_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
// load index for 16 field
|
||||
auto age16_col = raw_data.get_col<int16_t>(i16_fid);
|
||||
age16_col[0] = 2000;
|
||||
GenScalarIndexing(N, age16_col.data());
|
||||
auto age16_index = milvus::scalar::CreateScalarIndexSort<int16_t>();
|
||||
age16_index->Build(N, age16_col.data());
|
||||
load_index_info.field_id = i16_fid.get();
|
||||
load_index_info.field_type = Int16;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int16_t>>(age16_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
// load index for int32 field
|
||||
auto age32_col = raw_data.get_col<int32_t>(i32_fid);
|
||||
age32_col[0] = 2000;
|
||||
GenScalarIndexing(N, age32_col.data());
|
||||
auto age32_index = milvus::scalar::CreateScalarIndexSort<int32_t>();
|
||||
age32_index->Build(N, age32_col.data());
|
||||
load_index_info.field_id = i32_fid.get();
|
||||
load_index_info.field_type = Int32;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int32_t>>(age32_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
// load index for int64 field
|
||||
auto age64_col = raw_data.get_col<int64_t>(i64_fid);
|
||||
age64_col[0] = 2000;
|
||||
GenScalarIndexing(N, age64_col.data());
|
||||
auto age64_index = milvus::scalar::CreateScalarIndexSort<int64_t>();
|
||||
age64_index->Build(N, age64_col.data());
|
||||
load_index_info.field_id = i64_fid.get();
|
||||
load_index_info.field_type = Int64;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<int64_t>>(age64_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
// load index for float field
|
||||
auto age_float_col = raw_data.get_col<float>(float_fid);
|
||||
age_float_col[0] = 2000;
|
||||
GenScalarIndexing(N, age_float_col.data());
|
||||
auto age_float_index = milvus::scalar::CreateScalarIndexSort<float>();
|
||||
age_float_index->Build(N, age_float_col.data());
|
||||
load_index_info.field_id = float_fid.get();
|
||||
load_index_info.field_type = Float;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<float>>(age_float_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
// load index for double field
|
||||
auto age_double_col = raw_data.get_col<double>(double_fid);
|
||||
age_double_col[0] = 2000;
|
||||
GenScalarIndexing(N, age_double_col.data());
|
||||
auto age_double_index = milvus::scalar::CreateScalarIndexSort<double>();
|
||||
age_double_index->Build(N, age_double_col.data());
|
||||
load_index_info.field_id = double_fid.get();
|
||||
load_index_info.field_type = Float;
|
||||
load_index_info.index = std::shared_ptr<milvus::scalar::ScalarIndexSort<double>>(age_double_index.release());
|
||||
seg->LoadIndex(load_index_info);
|
||||
|
||||
auto seg_promote = dynamic_cast<SegmentSealedImpl*>(seg.get());
|
||||
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
||||
int offset = 0;
|
||||
for (auto [clause, ref_func, dtype] : testcases) {
|
||||
auto loc = serialized_expr_plan.find("@@@@@");
|
||||
auto expr_plan = serialized_expr_plan;
|
||||
expr_plan.replace(loc, 5, arith_expr);
|
||||
loc = expr_plan.find("@@@@");
|
||||
expr_plan.replace(loc, 4, clause);
|
||||
boost::format expr;
|
||||
if (dtype == DataType::INT8) {
|
||||
expr = boost::format(expr_plan) % vec_fid.get() % i8_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::INT8));
|
||||
} else if (dtype == DataType::INT16) {
|
||||
expr = boost::format(expr_plan) % vec_fid.get() % i16_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::INT16));
|
||||
} else if (dtype == DataType::INT32) {
|
||||
expr = boost::format(expr_plan) % vec_fid.get() % i32_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::INT32));
|
||||
} else if (dtype == DataType::INT64) {
|
||||
expr = boost::format(expr_plan) % vec_fid.get() % i64_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::INT64));
|
||||
} else if (dtype == DataType::FLOAT) {
|
||||
expr = boost::format(expr_plan) % vec_fid.get() % float_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::FLOAT));
|
||||
} else if (dtype == DataType::DOUBLE) {
|
||||
expr = boost::format(expr_plan) % vec_fid.get() % double_fid.get() %
|
||||
proto::schema::DataType_Name(int(DataType::DOUBLE));
|
||||
} else {
|
||||
ASSERT_TRUE(false) << "No test case defined for this data type";
|
||||
}
|
||||
|
||||
auto binary_plan = translate_text_plan_to_binary_plan(expr.str().data());
|
||||
auto plan = CreateSearchPlanByExpr(*schema, binary_plan.data(), binary_plan.size());
|
||||
|
||||
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
||||
EXPECT_EQ(final.size(), N);
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
auto ans = final[i];
|
||||
if (dtype == DataType::INT8) {
|
||||
auto val = age8_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
||||
} else if (dtype == DataType::INT16) {
|
||||
auto val = age16_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
||||
} else if (dtype == DataType::INT32) {
|
||||
auto val = age32_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
||||
} else if (dtype == DataType::INT64) {
|
||||
auto val = age64_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
||||
} else if (dtype == DataType::FLOAT) {
|
||||
auto val = age_float_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
||||
} else if (dtype == DataType::DOUBLE) {
|
||||
auto val = age_double_col[i];
|
||||
auto ref = ref_func(val);
|
||||
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
||||
} else {
|
||||
ASSERT_TRUE(false) << "No test case defined for this data type";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -660,8 +660,8 @@ TEST(Query, FillSegment) {
|
||||
}());
|
||||
segments.emplace_back([&] {
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
// auto indexing = GenIndexing(N, dim, std_vfloat_vec.data());
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
// auto indexing = GenVecIndexing(N, dim, std_vfloat_vec.data());
|
||||
|
||||
// LoadIndexInfo info;
|
||||
// auto field_offset = schema->get_offset(FieldName("fakevec"));
|
||||
|
||||
@ -57,7 +57,7 @@ TEST(Retrieve, AutoID) {
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
@ -107,7 +107,7 @@ TEST(Retrieve, AutoID2) {
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
@ -153,7 +153,7 @@ TEST(Retrieve, NotExist) {
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
@ -236,7 +236,7 @@ TEST(Retrieve, LargeTimestamp) {
|
||||
uint64_t ts_offset = 100;
|
||||
auto dataset = DataGen(schema, N, 42, ts_offset + 1);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
@ -285,7 +285,7 @@ TEST(Retrieve, Delete) {
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
auto i64_col = dataset.get_col<int64_t>(fid_64);
|
||||
|
||||
auto plan = std::make_unique<query::RetrievePlan>(*schema);
|
||||
|
||||
@ -88,6 +88,18 @@ TYPED_TEST_P(TypedScalarIndexTest, NotIn) {
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST_P(TypedScalarIndexTest, Reverse) {
|
||||
using T = TypeParam;
|
||||
auto dtype = milvus::GetDType<T>();
|
||||
auto index_types = GetIndexTypes<T>();
|
||||
for (const auto& index_type : index_types) {
|
||||
auto index = milvus::scalar::IndexFactory::GetInstance().CreateIndex<T>(index_type);
|
||||
auto arr = GenArr<T>(nb);
|
||||
index->Build(nb, arr.data());
|
||||
assert_reverse<T>(index, arr);
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST_P(TypedScalarIndexTest, Range) {
|
||||
using T = TypeParam;
|
||||
auto dtype = milvus::GetDType<T>();
|
||||
@ -123,6 +135,6 @@ TYPED_TEST_P(TypedScalarIndexTest, Codec) {
|
||||
// TODO: it's easy to overflow for int8_t. Design more reasonable ut.
|
||||
using ScalarT = ::testing::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
|
||||
|
||||
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest, Dummy, Constructor, Count, In, NotIn, Range, Codec);
|
||||
REGISTER_TYPED_TEST_CASE_P(TypedScalarIndexTest, Dummy, Constructor, Count, In, NotIn, Range, Codec, Reverse);
|
||||
|
||||
INSTANTIATE_TYPED_TEST_CASE_P(ArithmeticCheck, TypedScalarIndexTest, ScalarT);
|
||||
|
||||
@ -110,7 +110,11 @@ TEST(Sealed, without_predicate) {
|
||||
load_info.index = indexing;
|
||||
load_info.index_params["metric_type"] = "L2";
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, load_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(fake_id);
|
||||
sealed_segment->LoadIndex(load_info);
|
||||
|
||||
sr = sealed_segment->Search(plan.get(), *ph_group, time);
|
||||
|
||||
auto post_result = SearchResultToJson(*sr);
|
||||
@ -202,7 +206,11 @@ TEST(Sealed, with_predicate) {
|
||||
load_info.index = indexing;
|
||||
load_info.index_params["metric_type"] = "L2";
|
||||
|
||||
auto sealed_segment = SealedCreator(schema, dataset, load_info);
|
||||
// load index for vec field, load raw data for scalar filed
|
||||
auto sealed_segment = SealedCreator(schema, dataset);
|
||||
sealed_segment->DropFieldData(fake_id);
|
||||
sealed_segment->LoadIndex(load_info);
|
||||
|
||||
sr = sealed_segment->Search(plan.get(), *ph_group, time);
|
||||
|
||||
for (int i = 0; i < num_queries; ++i) {
|
||||
@ -229,7 +237,7 @@ TEST(Sealed, LoadFieldData) {
|
||||
|
||||
auto fakevec = dataset.get_col<float>(fakevec_id);
|
||||
|
||||
auto indexing = GenIndexing(N, dim, fakevec.data());
|
||||
auto indexing = GenVecIndexing(N, dim, fakevec.data());
|
||||
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
std::string dsl = R"({
|
||||
@ -268,7 +276,7 @@ TEST(Sealed, LoadFieldData) {
|
||||
|
||||
ASSERT_ANY_THROW(segment->Search(plan.get(), *ph_group, time));
|
||||
|
||||
SealedLoader(dataset, *segment);
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
segment->DropFieldData(nothing_id);
|
||||
segment->Search(plan.get(), *ph_group, time);
|
||||
|
||||
@ -282,8 +290,8 @@ TEST(Sealed, LoadFieldData) {
|
||||
segment->LoadIndex(vec_info);
|
||||
|
||||
ASSERT_EQ(segment->num_chunk(), 1);
|
||||
ASSERT_EQ(segment->num_chunk_index(double_id), 1);
|
||||
ASSERT_EQ(segment->num_chunk_index(str_id), 1);
|
||||
ASSERT_EQ(segment->num_chunk_index(double_id), 0);
|
||||
ASSERT_EQ(segment->num_chunk_index(str_id), 0);
|
||||
auto chunk_span1 = segment->chunk_data<int64_t>(counter_id, 0);
|
||||
auto chunk_span2 = segment->chunk_data<double>(double_id, 0);
|
||||
auto chunk_span3 = segment->chunk_data<std::string>(str_id, 0);
|
||||
@ -349,7 +357,7 @@ TEST(Sealed, LoadScalarIndex) {
|
||||
|
||||
auto fakevec = dataset.get_col<float>(fakevec_id);
|
||||
|
||||
auto indexing = GenIndexing(N, dim, fakevec.data());
|
||||
auto indexing = GenVecIndexing(N, dim, fakevec.data());
|
||||
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
std::string dsl = R"({
|
||||
@ -386,7 +394,21 @@ TEST(Sealed, LoadScalarIndex) {
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
SealedLoader(dataset, *segment);
|
||||
LoadFieldDataInfo row_id_info;
|
||||
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
|
||||
auto array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
|
||||
row_id_info.field_data = array.release();
|
||||
row_id_info.row_count = dataset.row_ids_.size();
|
||||
row_id_info.field_id = RowFieldID.get(); // field id for RowId
|
||||
segment->LoadFieldData(row_id_info);
|
||||
|
||||
LoadFieldDataInfo ts_info;
|
||||
FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
|
||||
array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta);
|
||||
ts_info.field_data = array.release();
|
||||
ts_info.row_count = dataset.timestamps_.size();
|
||||
ts_info.field_id = TimestampFieldID.get();
|
||||
segment->LoadFieldData(ts_info);
|
||||
|
||||
LoadIndexInfo vec_info;
|
||||
vec_info.field_id = fakevec_id.get();
|
||||
@ -477,7 +499,7 @@ TEST(Sealed, Delete) {
|
||||
|
||||
ASSERT_ANY_THROW(segment->Search(plan.get(), *ph_group, time));
|
||||
|
||||
SealedLoader(dataset, *segment);
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
|
||||
int64_t row_count = 5;
|
||||
std::vector<idx_t> pks{1, 2, 3, 4, 5};
|
||||
|
||||
@ -87,10 +87,52 @@ TEST_F(StringIndexMarisaTest, NotIn) {
|
||||
|
||||
TEST_F(StringIndexMarisaTest, Range) {
|
||||
auto index = milvus::scalar::CreateStringIndexMarisa();
|
||||
std::vector<std::string> strings(nb);
|
||||
for (int i = 0; i < nb; ++i) {
|
||||
strings[i] = std::to_string(std::rand() % 10);
|
||||
}
|
||||
*str_arr.mutable_data() = {strings.begin(), strings.end()};
|
||||
str_ds = GenDsFromPB(str_arr);
|
||||
index->BuildWithDataset(str_ds);
|
||||
|
||||
ASSERT_ANY_THROW(index->Range("not important", milvus::OpType::LessEqual));
|
||||
ASSERT_ANY_THROW(index->Range("not important", true, "not important", true));
|
||||
{
|
||||
auto bitset = index->Range("0", milvus::OpType::GreaterEqual);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = index->Range("90", milvus::OpType::LessThan);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = index->Range("9", milvus::OpType::LessEqual);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = index->Range("0", true, "9", true);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = index->Range("0", true, "90", false);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(StringIndexMarisaTest, Reverse) {
|
||||
auto index_types = GetIndexTypes<std::string>();
|
||||
for (const auto& index_type : index_types) {
|
||||
auto index = milvus::scalar::IndexFactory::GetInstance().CreateIndex<std::string>(index_type);
|
||||
index->BuildWithDataset(str_ds);
|
||||
assert_reverse<std::string>(index, strs);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(StringIndexMarisaTest, PrefixMatch) {
|
||||
@ -126,18 +168,21 @@ TEST_F(StringIndexMarisaTest, Query) {
|
||||
{
|
||||
auto ds = std::make_shared<knowhere::Dataset>();
|
||||
ds->Set<milvus::OpType>(milvus::scalar::OPERATOR_TYPE, milvus::OpType::GreaterEqual);
|
||||
ds->Set<std::string>(milvus::scalar::RANGE_VALUE, "range");
|
||||
ASSERT_ANY_THROW(index->Query(ds));
|
||||
ds->Set<std::string>(milvus::scalar::RANGE_VALUE, "0");
|
||||
auto bitset = index->Query(ds);
|
||||
ASSERT_EQ(bitset->size(), strs.size());
|
||||
ASSERT_EQ(bitset->count(), strs.size());
|
||||
}
|
||||
|
||||
{
|
||||
auto ds = std::make_shared<knowhere::Dataset>();
|
||||
ds->Set<milvus::OpType>(milvus::scalar::OPERATOR_TYPE, milvus::OpType::Range);
|
||||
ds->Set<std::string>(milvus::scalar::LOWER_BOUND_VALUE, "range");
|
||||
ds->Set<std::string>(milvus::scalar::LOWER_BOUND_VALUE, "0");
|
||||
ds->Set<std::string>(milvus::scalar::UPPER_BOUND_VALUE, "range");
|
||||
ds->Set<bool>(milvus::scalar::LOWER_BOUND_INCLUSIVE, true);
|
||||
ds->Set<bool>(milvus::scalar::UPPER_BOUND_INCLUSIVE, true);
|
||||
ASSERT_ANY_THROW(index->Query(ds));
|
||||
auto bitset = index->Query(ds);
|
||||
ASSERT_TRUE(bitset->any());
|
||||
}
|
||||
|
||||
{
|
||||
@ -154,6 +199,12 @@ TEST_F(StringIndexMarisaTest, Query) {
|
||||
|
||||
TEST_F(StringIndexMarisaTest, Codec) {
|
||||
auto index = milvus::scalar::CreateStringIndexMarisa();
|
||||
std::vector<std::string> strings(nb);
|
||||
for (int i = 0; i < nb; ++i) {
|
||||
strings[i] = std::to_string(std::rand() % 10);
|
||||
}
|
||||
*str_arr.mutable_data() = {strings.begin(), strings.end()};
|
||||
str_ds = GenDsFromPB(str_arr);
|
||||
index->BuildWithDataset(str_ds);
|
||||
|
||||
auto copy_index = milvus::scalar::CreateStringIndexMarisa();
|
||||
@ -164,27 +215,52 @@ TEST_F(StringIndexMarisaTest, Codec) {
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = copy_index->In(strs.size(), strs.data());
|
||||
ASSERT_EQ(bitset->size(), strs.size());
|
||||
auto bitset = copy_index->In(nb, strings.data());
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_TRUE(bitset->any());
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = copy_index->NotIn(strs.size(), strs.data());
|
||||
ASSERT_EQ(bitset->size(), strs.size());
|
||||
auto bitset = copy_index->NotIn(nb, strings.data());
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_TRUE(bitset->none());
|
||||
}
|
||||
|
||||
{
|
||||
ASSERT_ANY_THROW(copy_index->Range("not important", milvus::OpType::LessEqual));
|
||||
ASSERT_ANY_THROW(copy_index->Range("not important", true, "not important", true));
|
||||
auto bitset = copy_index->Range("0", milvus::OpType::GreaterEqual);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
for (size_t i = 0; i < strs.size(); i++) {
|
||||
auto str = strs[i];
|
||||
auto bitset = copy_index->Range("90", milvus::OpType::LessThan);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = copy_index->Range("9", milvus::OpType::LessEqual);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = copy_index->Range("0", true, "9", true);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
auto bitset = copy_index->Range("0", true, "90", false);
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_EQ(bitset->count(), nb);
|
||||
}
|
||||
|
||||
{
|
||||
for (size_t i = 0; i < nb; i++) {
|
||||
auto str = strings[i];
|
||||
auto bitset = copy_index->PrefixMatch(str);
|
||||
ASSERT_EQ(bitset->size(), strs.size());
|
||||
ASSERT_EQ(bitset->size(), nb);
|
||||
ASSERT_TRUE(bitset->test(i));
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,6 +18,20 @@
|
||||
using milvus::scalar::ScalarIndexPtr;
|
||||
|
||||
namespace {
|
||||
|
||||
bool
|
||||
compare_float(float x, float y, float epsilon = 0.000001f) {
|
||||
if (fabs(x - y) < epsilon)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
bool
|
||||
compare_double(double x, double y, double epsilon = 0.000001f) {
|
||||
if (fabs(x - y) < epsilon)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
assert_in(const ScalarIndexPtr<T>& index, const std::vector<T>& arr) {
|
||||
@ -74,6 +88,38 @@ assert_range(const ScalarIndexPtr<T>& index, const std::vector<T>& arr) {
|
||||
ASSERT_TRUE(bitset5->any());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void
|
||||
assert_reverse(const ScalarIndexPtr<T>& index, const std::vector<T>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_EQ(index->Reverse_Lookup(offset), arr[offset]);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
assert_reverse(const ScalarIndexPtr<float>& index, const std::vector<float>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_TRUE(compare_float(index->Reverse_Lookup(offset), arr[offset]));
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
assert_reverse(const ScalarIndexPtr<double>& index, const std::vector<double>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_TRUE(compare_double(index->Reverse_Lookup(offset), arr[offset]));
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
assert_reverse(const ScalarIndexPtr<std::string>& index, const std::vector<std::string>& arr) {
|
||||
for (size_t offset = 0; offset < arr.size(); ++offset) {
|
||||
ASSERT_TRUE(arr[offset].compare(index->Reverse_Lookup(offset)) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
assert_in(const ScalarIndexPtr<std::string>& index, const std::vector<std::string>& arr) {
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include "google/protobuf/text_format.h"
|
||||
#include <knowhere/index/vector_index/VecIndex.h>
|
||||
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
|
||||
#include <knowhere/index/vector_index/VecIndexFactory.h>
|
||||
@ -97,8 +98,10 @@ struct GeneratedData {
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
auto src_data = reinterpret_cast<const T*>(target_field_data.scalars().string_data().data().data());
|
||||
std::copy_n(src_data, raw_->num_rows(), ret.data());
|
||||
auto ret_data = reinterpret_cast<std::string*>(ret.data());
|
||||
auto src_data = target_field_data.scalars().string_data().data();
|
||||
std::copy(src_data.begin(), src_data.end(), ret_data);
|
||||
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
@ -354,8 +357,7 @@ SearchResultToJson(const SearchResult& sr) {
|
||||
};
|
||||
|
||||
inline void
|
||||
SealedLoader(const GeneratedData& dataset, SegmentSealed& seg) {
|
||||
// TODO
|
||||
SealedLoadFieldData(const GeneratedData& dataset, SegmentSealed& seg) {
|
||||
auto row_count = dataset.row_ids_.size();
|
||||
{
|
||||
LoadFieldDataInfo info;
|
||||
@ -385,15 +387,14 @@ SealedLoader(const GeneratedData& dataset, SegmentSealed& seg) {
|
||||
}
|
||||
|
||||
inline std::unique_ptr<SegmentSealed>
|
||||
SealedCreator(SchemaPtr schema, const GeneratedData& dataset, const LoadIndexInfo& index_info) {
|
||||
SealedCreator(SchemaPtr schema, const GeneratedData& dataset) {
|
||||
auto segment = CreateSealedSegment(schema);
|
||||
SealedLoader(dataset, *segment);
|
||||
segment->LoadIndex(index_info);
|
||||
SealedLoadFieldData(dataset, *segment);
|
||||
return segment;
|
||||
}
|
||||
|
||||
inline knowhere::VecIndexPtr
|
||||
GenIndexing(int64_t N, int64_t dim, const float* vec) {
|
||||
GenVecIndexing(int64_t N, int64_t dim, const float* vec) {
|
||||
// {knowhere::IndexParams::nprobe, 10},
|
||||
auto conf = knowhere::Config{{knowhere::meta::DIM, dim},
|
||||
{knowhere::IndexParams::nlist, 1024},
|
||||
@ -420,4 +421,20 @@ GenScalarIndexing(int64_t N, const T* data) {
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<char>
|
||||
translate_text_plan_to_binary_plan(const char* text_plan) {
|
||||
proto::plan::PlanNode plan_node;
|
||||
auto ok = google::protobuf::TextFormat::ParseFromString(text_plan, &plan_node);
|
||||
AssertInfo(ok, "Failed to parse");
|
||||
|
||||
std::string binary_plan;
|
||||
plan_node.SerializeToString(&binary_plan);
|
||||
|
||||
std::vector<char> ret;
|
||||
ret.resize(binary_plan.size());
|
||||
std::memcpy(ret.data(), binary_plan.c_str(), binary_plan.size());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
||||
@ -828,7 +828,7 @@ func (s *Segment) segmentLoadIndexData(bytesIndex [][]byte, indexInfo *querypb.F
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info("updateSegmentIndex done", zap.Int64("segmentID", s.ID()))
|
||||
log.Info("updateSegmentIndex done", zap.Int64("segmentID", s.ID()), zap.Int64("fieldID", indexInfo.FieldID))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -253,12 +253,6 @@ func (loader *segmentLoader) loadSegmentInternal(segment *Segment,
|
||||
indexInfo: indexInfo,
|
||||
}
|
||||
indexedFieldInfos[fieldID] = fieldInfo
|
||||
|
||||
if fieldBinlog.FieldID == pkFieldID {
|
||||
// pk field data should always be loaded.
|
||||
// segCore need a map (pk data -> offset)
|
||||
fieldBinlogs = append(fieldBinlogs, fieldBinlog)
|
||||
}
|
||||
} else {
|
||||
fieldBinlogs = append(fieldBinlogs, fieldBinlog)
|
||||
}
|
||||
@ -402,7 +396,7 @@ func (loader *segmentLoader) loadIndexedFieldData(segment *Segment, vecFieldInfo
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Debug("load vector field's index data done", zap.Int64("segmentID", segment.ID()), zap.Int64("fieldID", fieldID))
|
||||
log.Debug("load field's index data done", zap.Int64("segmentID", segment.ID()), zap.Int64("fieldID", fieldID))
|
||||
}
|
||||
segment.setIndexedFieldInfo(fieldID, fieldInfo)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user