milvus/internal/util/ctokenizer/c_tokenizer_test.go
Jiquan Long 5ea2454fdf
feat: tantivy tokenizer binding (#35801)
fix: #35800

---------

Signed-off-by: longjiquan <jiquan.long@zilliz.com>
2024-09-01 17:13:03 +08:00

40 lines
897 B
Go

package ctokenizer
import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
)
func TestTokenizer(t *testing.T) {
// default tokenizer.
{
m := make(map[string]string)
tokenizer, err := NewTokenizer(m)
assert.NoError(t, err)
defer tokenizer.Destroy()
tokenStream := tokenizer.NewTokenStream("football, basketball, pingpang")
defer tokenStream.Destroy()
for tokenStream.Advance() {
fmt.Println(tokenStream.Token())
}
}
// jieba tokenizer.
{
m := make(map[string]string)
m["tokenizer"] = "jieba"
tokenizer, err := NewTokenizer(m)
assert.NoError(t, err)
defer tokenizer.Destroy()
tokenStream := tokenizer.NewTokenStream("张华考上了北京大学;李萍进了中等技术学校;我在百货公司当售货员:我们都有光明的前途")
defer tokenStream.Destroy()
for tokenStream.Advance() {
fmt.Println(tokenStream.Token())
}
}
}