// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include "common/EasyAssert.h" #include "common/Types.h" namespace milvus { using TypeParams = std::map; using TokenizerParams = std::string; TokenizerParams ParseTokenizerParams(const TypeParams& params); class FieldMeta { public: static const FieldMeta RowIdMeta; FieldMeta(const FieldMeta&) = default; FieldMeta(FieldMeta&&) = default; FieldMeta& operator=(const FieldMeta&) = delete; FieldMeta& operator=(FieldMeta&&) = default; FieldMeta(FieldName name, FieldId id, DataType type, bool nullable) : name_(std::move(name)), id_(id), type_(type), nullable_(nullable) { Assert(!IsVectorDataType(type_)); } FieldMeta(FieldName name, FieldId id, DataType type, int64_t max_length, bool nullable) : name_(std::move(name)), id_(id), type_(type), nullable_(nullable), string_info_(StringInfo{max_length}) { Assert(IsStringDataType(type_)); } FieldMeta(FieldName name, FieldId id, DataType type, int64_t max_length, bool nullable, bool enable_match, bool enable_analyzer, std::map& params) : name_(std::move(name)), id_(id), type_(type), nullable_(nullable), string_info_(StringInfo{ max_length, enable_match, enable_analyzer, std::move(params)}) { Assert(IsStringDataType(type_)); } FieldMeta(FieldName name, FieldId id, DataType type, DataType element_type, bool nullable) : name_(std::move(name)), id_(id), type_(type), element_type_(element_type), nullable_(nullable) { Assert(IsArrayDataType(type_)); } // pass in any value for dim for sparse vector is ok as it'll never be used: // get_dim() not allowed to be invoked on a sparse vector field. FieldMeta(FieldName name, FieldId id, DataType type, int64_t dim, std::optional metric_type, bool nullable) : name_(std::move(name)), id_(id), type_(type), nullable_(nullable), vector_info_(VectorInfo{dim, std::move(metric_type)}) { Assert(IsVectorDataType(type_)); Assert(!nullable); } int64_t get_dim() const { Assert(IsVectorDataType(type_)); // should not attempt to get dim() of a sparse vector from schema. Assert(!IsSparseFloatVectorDataType(type_)); Assert(vector_info_.has_value()); return vector_info_->dim_; } int64_t get_max_len() const { Assert(IsStringDataType(type_)); Assert(string_info_.has_value()); return string_info_->max_length; } bool enable_match() const; bool enable_analyzer() const; TokenizerParams get_analyzer_params() const; std::optional get_metric_type() const { Assert(IsVectorDataType(type_)); Assert(vector_info_.has_value()); return vector_info_->metric_type_; } const FieldName& get_name() const { return name_; } const FieldId& get_id() const { return id_; } DataType get_data_type() const { return type_; } DataType get_element_type() const { return element_type_; } bool is_vector() const { return IsVectorDataType(type_); } bool is_json() const { return type_ == DataType::JSON; } bool is_string() const { return IsStringDataType(type_); } bool is_nullable() const { return nullable_; } size_t get_sizeof() const { AssertInfo(!IsSparseFloatVectorDataType(type_), "should not attempt to get_sizeof() of a sparse vector from " "schema"); static const size_t ARRAY_SIZE = 128; static const size_t JSON_SIZE = 512; if (is_vector()) { return GetDataTypeSize(type_, get_dim()); } else if (is_string()) { Assert(string_info_.has_value()); return string_info_->max_length; } else if (IsVariableDataType(type_)) { return type_ == DataType::ARRAY ? ARRAY_SIZE : JSON_SIZE; } else { return GetDataTypeSize(type_); } } public: static FieldMeta ParseFrom(const milvus::proto::schema::FieldSchema& schema_proto); private: struct VectorInfo { int64_t dim_; std::optional metric_type_; }; struct StringInfo { int64_t max_length; bool enable_match; bool enable_analyzer; std::map params; }; FieldName name_; FieldId id_; DataType type_ = DataType::NONE; DataType element_type_ = DataType::NONE; bool nullable_; std::optional vector_info_; std::optional string_info_; }; } // namespace milvus