aoiasd ad9a0cae48
enhance: add global analyzer options (#44684)
relate: https://github.com/milvus-io/milvus/issues/43687
Add global analyzer options, avoid having to merge some milvus params
into user's analyzer params.

Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
2025-10-28 14:52:10 +08:00

73 lines
2.1 KiB
C++

#pragma once
#include "tantivy-binding.h"
#include "rust-binding.h"
#include "rust-hashmap.h"
#include "tantivy/rust-array.h"
#include "token-stream.h"
#include "log/Log.h"
namespace milvus::tantivy {
struct Tokenizer {
public:
NO_COPY_OR_ASSIGN(Tokenizer);
explicit Tokenizer(std::string&& params) {
auto shared_params = std::make_shared<std::string>(std::move(params));
auto res =
RustResultWrapper(tantivy_create_analyzer(shared_params->c_str()));
AssertInfo(res.result_->success,
"Tokenizer creation failed: {}",
res.result_->error);
ptr_ = res.result_->value.ptr._0;
}
explicit Tokenizer(void* _ptr) : ptr_(_ptr) {
}
~Tokenizer() {
if (ptr_ != nullptr) {
tantivy_free_analyzer(ptr_);
}
}
std::unique_ptr<TokenStream>
CreateTokenStream(std::string&& text) {
auto shared_text = std::make_shared<std::string>(std::move(text));
auto token_stream =
tantivy_create_token_stream(ptr_, shared_text->c_str());
return std::make_unique<TokenStream>(token_stream, shared_text);
}
std::unique_ptr<Tokenizer>
Clone() {
auto newptr = tantivy_clone_analyzer(ptr_);
return std::make_unique<milvus::tantivy::Tokenizer>(newptr);
}
// CreateTokenStreamCopyText will copy the text and then create token stream based on the text.
std::unique_ptr<TokenStream>
CreateTokenStreamCopyText(const std::string& text) {
auto shared_text = std::make_shared<std::string>(text);
auto token_stream =
tantivy_create_token_stream(ptr_, shared_text->c_str());
return std::make_unique<TokenStream>(token_stream, shared_text);
}
private:
void* ptr_;
};
void
set_tokenizer_options(std::string&& params) {
auto shared_params = std::make_shared<std::string>(params);
auto res =
RustResultWrapper(tantivy_set_analyzer_options(shared_params->c_str()));
AssertInfo(res.result_->success,
"Set analyzer option failed: {}",
res.result_->error);
}
} // namespace milvus::tantivy