mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
enhance: Utilize search_batch_pks for search_ids of PkTerm (#43751)
Related to #43660 --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
parent
b8fe8aed53
commit
b6199acb05
@ -34,9 +34,13 @@ PhyJsonContainsFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
|
||||
return;
|
||||
}
|
||||
if (expr_->op_ == proto::plan::JSONContainsExpr_JSONOp_ContainsAll) {
|
||||
result = std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, true), TargetBitmap(real_batch_size, true));
|
||||
result = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, true),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
} else {
|
||||
result = std::make_shared<ColumnVector>(TargetBitmap(real_batch_size, false), TargetBitmap(real_batch_size, true));
|
||||
result = std::make_shared<ColumnVector>(
|
||||
TargetBitmap(real_batch_size, false),
|
||||
TargetBitmap(real_batch_size, true));
|
||||
}
|
||||
MoveCursor();
|
||||
return;
|
||||
|
||||
@ -191,8 +191,7 @@ PhyTermFilterExpr::InitPkCacheOffset() {
|
||||
}
|
||||
}
|
||||
|
||||
auto [uids, seg_offsets] =
|
||||
segment_->search_ids(*id_array, query_timestamp_);
|
||||
auto seg_offsets = segment_->search_ids(*id_array, query_timestamp_);
|
||||
cached_bits_.resize(active_count_, false);
|
||||
for (const auto& offset : seg_offsets) {
|
||||
auto _offset = (int64_t)offset.get();
|
||||
@ -540,8 +539,7 @@ PhyTermFilterExpr::ExecJsonInVariableByKeyIndex() {
|
||||
if (!arg_inited_) {
|
||||
arg_set_ = std::make_shared<SetElement<ValueType>>(expr_->vals_);
|
||||
if constexpr (std::is_same_v<GetType, double>) {
|
||||
arg_set_float_ =
|
||||
std::make_shared<SetElement<float>>(expr_->vals_);
|
||||
arg_set_float_ = std::make_shared<SetElement<float>>(expr_->vals_);
|
||||
}
|
||||
arg_inited_ = true;
|
||||
}
|
||||
|
||||
@ -916,17 +916,18 @@ ChunkedSegmentSealedImpl::search_pk(const PkType& pk,
|
||||
void
|
||||
ChunkedSegmentSealedImpl::search_batch_pks(
|
||||
const std::vector<PkType>& pks,
|
||||
const Timestamp* timestamps,
|
||||
const std::function<Timestamp(const size_t idx)>& get_timestamp,
|
||||
bool include_same_ts,
|
||||
const std::function<void(const SegOffset offset, const Timestamp ts)>&
|
||||
callback) const {
|
||||
// handle unsorted case
|
||||
if (!is_sorted_by_pk_) {
|
||||
for (size_t i = 0; i < pks.size(); i++) {
|
||||
auto offsets = insert_record_.search_pk(
|
||||
pks[i], timestamps[i], include_same_ts);
|
||||
auto timestamp = get_timestamp(i);
|
||||
auto offsets =
|
||||
insert_record_.search_pk(pks[i], timestamp, include_same_ts);
|
||||
for (auto offset : offsets) {
|
||||
callback(offset, timestamps[i]);
|
||||
callback(offset, timestamp);
|
||||
}
|
||||
}
|
||||
return;
|
||||
@ -956,7 +957,7 @@ ChunkedSegmentSealedImpl::search_batch_pks(
|
||||
for (size_t j = 0; j < pks.size(); j++) {
|
||||
// get int64 pks
|
||||
auto target = std::get<int64_t>(pks[j]);
|
||||
auto timestamp = timestamps[j];
|
||||
auto timestamp = get_timestamp(j);
|
||||
auto it = std::lower_bound(
|
||||
src,
|
||||
src + chunk_row_num,
|
||||
@ -988,7 +989,7 @@ ChunkedSegmentSealedImpl::search_batch_pks(
|
||||
for (size_t j = 0; j < pks.size(); ++j) {
|
||||
// get varchar pks
|
||||
auto& target = std::get<std::string>(pks[j]);
|
||||
auto timestamp = timestamps[j];
|
||||
auto timestamp = get_timestamp(j);
|
||||
auto offset = string_chunk->binary_search_string(target);
|
||||
for (; offset != -1 && offset < string_chunk->RowNums() &&
|
||||
string_chunk->operator[](offset) == target;
|
||||
@ -1142,7 +1143,11 @@ ChunkedSegmentSealedImpl::ChunkedSegmentSealedImpl(
|
||||
const Timestamp* timestamps,
|
||||
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||
callback) {
|
||||
this->search_batch_pks(pks, timestamps, false, callback);
|
||||
this->search_batch_pks(
|
||||
pks,
|
||||
[&](const size_t idx) { return timestamps[idx]; },
|
||||
false,
|
||||
callback);
|
||||
},
|
||||
segment_id) {
|
||||
auto mcm = storage::MmapManager::GetInstance().GetMmapChunkManager();
|
||||
@ -1752,7 +1757,7 @@ ChunkedSegmentSealedImpl::GetFieldDataType(milvus::FieldId field_id) const {
|
||||
return field_meta.get_data_type();
|
||||
}
|
||||
|
||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
std::vector<SegOffset>
|
||||
ChunkedSegmentSealedImpl::search_ids(const IdArray& id_array,
|
||||
Timestamp timestamp) const {
|
||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
@ -1763,37 +1768,16 @@ ChunkedSegmentSealedImpl::search_ids(const IdArray& id_array,
|
||||
std::vector<PkType> pks(ids_size);
|
||||
ParsePksFromIDs(pks, data_type, id_array);
|
||||
|
||||
auto res_id_arr = std::make_unique<IdArray>();
|
||||
std::vector<SegOffset> res_offsets;
|
||||
res_offsets.reserve(pks.size());
|
||||
for (auto& pk : pks) {
|
||||
std::vector<SegOffset> pk_offsets;
|
||||
if (!is_sorted_by_pk_) {
|
||||
pk_offsets = insert_record_.search_pk(pk, timestamp);
|
||||
} else {
|
||||
pk_offsets = search_pk(pk, timestamp);
|
||||
}
|
||||
for (auto offset : pk_offsets) {
|
||||
switch (data_type) {
|
||||
case DataType::INT64: {
|
||||
res_id_arr->mutable_int_id()->add_data(
|
||||
std::get<int64_t>(pk));
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
res_id_arr->mutable_str_id()->add_data(
|
||||
std::get<std::string>(std::move(pk)));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ThrowInfo(DataTypeInvalid,
|
||||
fmt::format("unsupported type {}", data_type));
|
||||
}
|
||||
}
|
||||
this->search_batch_pks(
|
||||
pks,
|
||||
[=](const size_t idx) { return timestamp; },
|
||||
true,
|
||||
[&](const SegOffset offset, const Timestamp ts) {
|
||||
res_offsets.push_back(offset);
|
||||
}
|
||||
}
|
||||
return {std::move(res_id_arr), std::move(res_offsets)};
|
||||
});
|
||||
return std::move(res_offsets);
|
||||
}
|
||||
|
||||
SegcoreError
|
||||
|
||||
@ -207,7 +207,7 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
||||
void
|
||||
search_batch_pks(
|
||||
const std::vector<PkType>& pks,
|
||||
const Timestamp* timestamps,
|
||||
const std::function<Timestamp(const size_t idx)>& get_timestamp,
|
||||
bool include_same_ts,
|
||||
const std::function<void(const SegOffset offset, const Timestamp ts)>&
|
||||
callback) const;
|
||||
@ -410,7 +410,7 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
|
||||
return system_ready_count_ == 1;
|
||||
}
|
||||
|
||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
std::vector<SegOffset>
|
||||
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
|
||||
|
||||
void
|
||||
|
||||
@ -118,7 +118,9 @@ class DeletedRecord {
|
||||
}
|
||||
}
|
||||
search_pk_func_(
|
||||
pks, timestamps, [&](SegOffset offset, Timestamp delete_ts) {
|
||||
pks,
|
||||
timestamps,
|
||||
[&](const SegOffset offset, const Timestamp delete_ts) {
|
||||
auto row_id = offset.get();
|
||||
// if already deleted, no need to add new record
|
||||
if (deleted_mask_.size() > row_id && deleted_mask_[row_id]) {
|
||||
|
||||
@ -1106,7 +1106,7 @@ SegmentGrowingImpl::bulk_subscript(SystemFieldType system_type,
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
std::vector<SegOffset>
|
||||
SegmentGrowingImpl::search_ids(const IdArray& id_array,
|
||||
Timestamp timestamp) const {
|
||||
auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1));
|
||||
@ -1117,32 +1117,15 @@ SegmentGrowingImpl::search_ids(const IdArray& id_array,
|
||||
std::vector<PkType> pks(ids_size);
|
||||
ParsePksFromIDs(pks, data_type, id_array);
|
||||
|
||||
auto res_id_arr = std::make_unique<IdArray>();
|
||||
std::vector<SegOffset> res_offsets;
|
||||
res_offsets.reserve(pks.size());
|
||||
for (auto& pk : pks) {
|
||||
auto segOffsets = insert_record_.search_pk(pk, timestamp);
|
||||
for (auto offset : segOffsets) {
|
||||
switch (data_type) {
|
||||
case DataType::INT64: {
|
||||
res_id_arr->mutable_int_id()->add_data(
|
||||
std::get<int64_t>(pk));
|
||||
break;
|
||||
}
|
||||
case DataType::VARCHAR: {
|
||||
res_id_arr->mutable_str_id()->add_data(
|
||||
std::get<std::string>(std::move(pk)));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ThrowInfo(DataTypeInvalid,
|
||||
fmt::format("unsupported type {}", data_type));
|
||||
}
|
||||
}
|
||||
res_offsets.push_back(offset);
|
||||
}
|
||||
}
|
||||
return {std::move(res_id_arr), std::move(res_offsets)};
|
||||
return std::move(res_offsets);
|
||||
}
|
||||
|
||||
std::string
|
||||
|
||||
@ -340,7 +340,7 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||
int64_t ins_barrier,
|
||||
Timestamp timestamp) const override;
|
||||
|
||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
std::vector<SegOffset>
|
||||
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
|
||||
|
||||
bool
|
||||
|
||||
@ -440,7 +440,14 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
virtual int64_t
|
||||
get_active_count(Timestamp ts) const = 0;
|
||||
|
||||
virtual std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
/**
|
||||
* search offset by possible pk values and mvcc timestamp
|
||||
*
|
||||
* @param id_array possible pk values
|
||||
* @param timestamp mvcc timestamp
|
||||
* @return all the hit entries in vector of offsets
|
||||
*/
|
||||
virtual std::vector<SegOffset>
|
||||
search_ids(const IdArray& id_array, Timestamp timestamp) const = 0;
|
||||
|
||||
/**
|
||||
|
||||
@ -45,7 +45,8 @@ TEST(DeleteMVCC, common_case) {
|
||||
[&insert_record](
|
||||
const std::vector<PkType>& pks,
|
||||
const Timestamp* timestamps,
|
||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
||||
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||
cb) {
|
||||
for (size_t i = 0; i < pks.size(); ++i) {
|
||||
auto timestamp = timestamps[i];
|
||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||
@ -170,7 +171,8 @@ TEST(DeleteMVCC, delete_exist_duplicate_pks) {
|
||||
[&insert_record](
|
||||
const std::vector<PkType>& pks,
|
||||
const Timestamp* timestamps,
|
||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
||||
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||
cb) {
|
||||
for (size_t i = 0; i < pks.size(); ++i) {
|
||||
auto timestamp = timestamps[i];
|
||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||
@ -294,7 +296,8 @@ TEST(DeleteMVCC, snapshot) {
|
||||
[&insert_record](
|
||||
const std::vector<PkType>& pks,
|
||||
const Timestamp* timestamps,
|
||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
||||
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||
cb) {
|
||||
for (size_t i = 0; i < pks.size(); ++i) {
|
||||
auto timestamp = timestamps[i];
|
||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||
@ -351,7 +354,8 @@ TEST(DeleteMVCC, insert_after_snapshot) {
|
||||
[&insert_record](
|
||||
const std::vector<PkType>& pks,
|
||||
const Timestamp* timestamps,
|
||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
||||
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||
cb) {
|
||||
for (size_t i = 0; i < pks.size(); ++i) {
|
||||
auto timestamp = timestamps[i];
|
||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||
@ -455,7 +459,8 @@ TEST(DeleteMVCC, perform) {
|
||||
[&insert_record](
|
||||
const std::vector<PkType>& pks,
|
||||
const Timestamp* timestamps,
|
||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
||||
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||
cb) {
|
||||
for (size_t i = 0; i < pks.size(); ++i) {
|
||||
auto timestamp = timestamps[i];
|
||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||
|
||||
@ -93,7 +93,8 @@ TEST(Util, GetDeleteBitmap) {
|
||||
[&insert_record](
|
||||
const std::vector<PkType>& pks,
|
||||
const Timestamp* timestamps,
|
||||
std::function<void(SegOffset offset, Timestamp ts)> cb) {
|
||||
std::function<void(const SegOffset offset, const Timestamp ts)>
|
||||
cb) {
|
||||
for (size_t i = 0; i < pks.size(); ++i) {
|
||||
auto timestamp = timestamps[i];
|
||||
auto offsets = insert_record.search_pk(pks[i], timestamp);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user