Spade A f552ec67dd
fix: support building tantivy index with low version(5) (#40822)
fix: https://github.com/milvus-io/milvus/issues/40823
To solve the problem in the issue, we have to support building tantivy
index with low version
for those query nodes with low tantivy version.

This PR does two things:
1. refactor codes for IndexWriterWrapper to make it concise
2. enable IndexWriterWrapper to build tantivy index by different tantivy
crate

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
2025-04-02 18:46:20 +08:00

58 lines
1.1 KiB
C++

#pragma once
#include <assert.h>
#include <memory>
#include <string>
#include "tantivy-binding.h"
#include "rust-binding.h"
namespace milvus::tantivy {
using Token = TantivyToken;
struct TokenStream {
public:
NO_COPY_OR_ASSIGN(TokenStream);
TokenStream(void* ptr, std::shared_ptr<std::string> text)
: ptr_(ptr), text_(text) {
assert(ptr != nullptr);
}
~TokenStream() {
if (ptr_ != nullptr) {
tantivy_free_token_stream(ptr_);
}
}
public:
bool
advance() {
return tantivy_token_stream_advance(ptr_);
}
std::string
get_token() {
auto token = tantivy_token_stream_get_token(ptr_);
std::string s(token);
free_rust_string(token);
return s;
}
TantivyToken
get_detailed_token() {
return tantivy_token_stream_get_detailed_token(ptr_);
}
// Note: the returned token must be freed by calling `free_rust_string`.
const char*
get_token_no_copy() {
return tantivy_token_stream_get_token(ptr_);
}
public:
void* ptr_;
std::shared_ptr<std::string> text_;
};
} // namespace milvus::tantivy