Jiquan Long 5ea2454fdf
feat: tantivy tokenizer binding (#35801)
fix: #35800

---------

Signed-off-by: longjiquan <jiquan.long@zilliz.com>
2024-09-01 17:13:03 +08:00

51 lines
1.0 KiB
C++

#pragma once
#include <assert.h>
#include <memory>
#include <string>
#include "tantivy-binding.h"
#include "rust-binding.h"
namespace milvus::tantivy {
struct TokenStream {
public:
NO_COPY_OR_ASSIGN(TokenStream);
TokenStream(void* ptr, std::shared_ptr<std::string> text)
: ptr_(ptr), text_(text) {
assert(ptr != nullptr);
}
~TokenStream() {
if (ptr_ != nullptr) {
tantivy_free_token_stream(ptr_);
}
}
public:
bool
advance() {
return tantivy_token_stream_advance(ptr_);
}
std::string
get_token() {
auto token = tantivy_token_stream_get_token(ptr_);
std::string s(token);
free_rust_string(token);
return s;
}
// Note: the returned token must be freed by calling `free_rust_string`.
const char*
get_token_no_copy() {
return tantivy_token_stream_get_token(ptr_);
}
public:
void* ptr_;
std::shared_ptr<std::string> text_;
};
} // namespace milvus::tantivy