// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef ARROW_STL_H #define ARROW_STL_H #include #include #include #include #include "arrow/builder.h" #include "arrow/compute/api.h" #include "arrow/table.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" namespace arrow { class Schema; namespace stl { /// Traits meta class to map standard C/C++ types to equivalent Arrow types. template struct ConversionTraits {}; #define ARROW_STL_CONVERSION(c_type, ArrowType_) \ template <> \ struct ConversionTraits : public CTypeTraits { \ static Status AppendRow(typename TypeTraits::BuilderType& builder, \ c_type cell) { \ return builder.Append(cell); \ } \ static c_type GetEntry(const typename TypeTraits::ArrayType& array, \ size_t j) { \ return array.Value(j); \ } \ constexpr static bool nullable = false; \ }; ARROW_STL_CONVERSION(bool, BooleanType) ARROW_STL_CONVERSION(int8_t, Int8Type) ARROW_STL_CONVERSION(int16_t, Int16Type) ARROW_STL_CONVERSION(int32_t, Int32Type) ARROW_STL_CONVERSION(int64_t, Int64Type) ARROW_STL_CONVERSION(uint8_t, UInt8Type) ARROW_STL_CONVERSION(uint16_t, UInt16Type) ARROW_STL_CONVERSION(uint32_t, UInt32Type) ARROW_STL_CONVERSION(uint64_t, UInt64Type) ARROW_STL_CONVERSION(float, FloatType) ARROW_STL_CONVERSION(double, DoubleType) template <> struct ConversionTraits : public CTypeTraits { static Status AppendRow(StringBuilder& builder, const std::string& cell) { return builder.Append(cell); } static std::string GetEntry(const StringArray& array, size_t j) { return array.GetString(j); } constexpr static bool nullable = false; }; template struct ConversionTraits> : public CTypeTraits> { static Status AppendRow(ListBuilder& builder, std::vector cell) { using ElementBuilderType = typename TypeTraits< typename ConversionTraits::ArrowType>::BuilderType; ARROW_RETURN_NOT_OK(builder.Append()); ElementBuilderType& value_builder = ::arrow::internal::checked_cast(*builder.value_builder()); for (auto const& value : cell) { ARROW_RETURN_NOT_OK( ConversionTraits::AppendRow(value_builder, value)); } return Status::OK(); } static std::vector GetEntry(const ListArray& array, size_t j) { using ElementArrayType = typename TypeTraits< typename ConversionTraits::ArrowType>::ArrayType; const ElementArrayType& value_array = ::arrow::internal::checked_cast(*array.values()); std::vector vec(array.value_length(j)); for (int64_t i = 0; i < array.value_length(j); i++) { vec[i] = ConversionTraits::GetEntry(value_array, array.value_offset(j) + i); } return vec; } constexpr static bool nullable = false; }; /// Build an arrow::Schema based upon the types defined in a std::tuple-like structure. /// /// While the type information is available at compile-time, we still need to add the /// column names at runtime, thus these methods are not constexpr. template ::value> struct SchemaFromTuple { using Element = typename std::tuple_element::type; // Implementations that take a vector-like object for the column names. /// Recursively build a vector of arrow::Field from the defined types. /// /// In most cases MakeSchema is the better entrypoint for the Schema creation. static std::vector> MakeSchemaRecursion( const std::vector& names) { std::vector> ret = SchemaFromTuple::MakeSchemaRecursion(names); std::shared_ptr type = CTypeTraits::type_singleton(); ret.push_back(field(names[N - 1], type, false /* nullable */)); return ret; } /// Build a Schema from the types of the tuple-like structure passed in as template /// parameter assign the column names at runtime. /// /// An example usage of this API can look like the following: /// /// \code{.cpp} /// using TupleType = std::tuple>; /// std::shared_ptr schema = /// SchemaFromTuple::MakeSchema({"int_column", "list_of_strings_column"}); /// \endcode static std::shared_ptr MakeSchema(const std::vector& names) { return std::make_shared(MakeSchemaRecursion(names)); } // Implementations that take a tuple-like object for the column names. /// Recursively build a vector of arrow::Field from the defined types. /// /// In most cases MakeSchema is the better entrypoint for the Schema creation. template static std::vector> MakeSchemaRecursionT( const NamesTuple& names) { using std::get; std::vector> ret = SchemaFromTuple::MakeSchemaRecursionT(names); std::shared_ptr type = ConversionTraits::type_singleton(); ret.push_back(field(get(names), type, ConversionTraits::nullable)); return ret; } /// Build a Schema from the types of the tuple-like structure passed in as template /// parameter assign the column names at runtime. /// /// An example usage of this API can look like the following: /// /// \code{.cpp} /// using TupleType = std::tuple>; /// std::shared_ptr schema = /// SchemaFromTuple::MakeSchema({"int_column", "list_of_strings_column"}); /// \endcode template static std::shared_ptr MakeSchema(const NamesTuple& names) { return std::make_shared(MakeSchemaRecursionT(names)); } }; template struct SchemaFromTuple { static std::vector> MakeSchemaRecursion( const std::vector& names) { std::vector> ret; ret.reserve(names.size()); return ret; } template static std::vector> MakeSchemaRecursionT( const NamesTuple& names) { std::vector> ret; ret.reserve(std::tuple_size::value); return ret; } }; namespace internal { template ::value> struct CreateBuildersRecursive { static Status Make(MemoryPool* pool, std::vector>* builders) { using Element = typename std::tuple_element::type; std::shared_ptr type = ConversionTraits::type_singleton(); ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &builders->at(N - 1))); return CreateBuildersRecursive::Make(pool, builders); } }; template struct CreateBuildersRecursive { static Status Make(MemoryPool*, std::vector>*) { return Status::OK(); } }; template ::value> struct RowIterator { static Status Append(const std::vector>& builders, const Tuple& row) { using std::get; using Element = typename std::tuple_element::type; using BuilderType = typename TypeTraits::ArrowType>::BuilderType; BuilderType& builder = ::arrow::internal::checked_cast(*builders[N - 1]); ARROW_RETURN_NOT_OK(ConversionTraits::AppendRow(builder, get(row))); return RowIterator::Append(builders, row); } }; template struct RowIterator { static Status Append(const std::vector>& builders, const Tuple& row) { return Status::OK(); } }; template ::value> struct EnsureColumnTypes { static Status Cast(const Table& table, std::shared_ptr* table_owner, const compute::CastOptions& cast_options, compute::FunctionContext* ctx, std::reference_wrapper* result) { using Element = typename std::tuple_element::type; std::shared_ptr expected_type = ConversionTraits::type_singleton(); if (!table.schema()->field(N - 1)->type()->Equals(*expected_type)) { compute::Datum casted; ARROW_RETURN_NOT_OK(compute::Cast(ctx, compute::Datum(table.column(N - 1)->data()), expected_type, cast_options, &casted)); std::shared_ptr new_column = std::make_shared( table.schema()->field(N - 1)->WithType(expected_type), casted.chunked_array()); ARROW_RETURN_NOT_OK(table.SetColumn(N - 1, new_column, table_owner)); *result = **table_owner; } return EnsureColumnTypes::Cast(result->get(), table_owner, cast_options, ctx, result); } }; template struct EnsureColumnTypes { static Status Cast(const Table& table, std::shared_ptr
* table_ownder, const compute::CastOptions& cast_options, compute::FunctionContext* ctx, std::reference_wrapper* result) { return Status::OK(); } }; template ::value> struct TupleSetter { static void Fill(const Table& table, Range* rows) { using std::get; using Element = typename std::tuple_element::type; using ArrayType = typename TypeTraits::ArrowType>::ArrayType; auto iter = rows->begin(); const ChunkedArray& chunked_array = *table.column(N - 1)->data(); for (int i = 0; i < chunked_array.num_chunks(); i++) { const ArrayType& array = ::arrow::internal::checked_cast(*chunked_array.chunk(i)); for (int64_t j = 0; j < array.length(); j++) { get(*iter++) = ConversionTraits::GetEntry(array, j); } } return TupleSetter::Fill(table, rows); } }; template struct TupleSetter { static void Fill(const Table& table, Range* rows) {} }; } // namespace internal template Status TableFromTupleRange(MemoryPool* pool, const Range& rows, const std::vector& names, std::shared_ptr
* table) { using row_type = typename std::iterator_traits::value_type; constexpr std::size_t n_columns = std::tuple_size::value; std::shared_ptr schema = SchemaFromTuple::MakeSchema(names); std::vector> builders(n_columns); ARROW_RETURN_NOT_OK(internal::CreateBuildersRecursive::Make(pool, &builders)); for (auto const& row : rows) { ARROW_RETURN_NOT_OK(internal::RowIterator::Append(builders, row)); } std::vector> arrays; for (auto const& builder : builders) { std::shared_ptr array; ARROW_RETURN_NOT_OK(builder->Finish(&array)); arrays.emplace_back(array); } *table = Table::Make(schema, arrays); return Status::OK(); } template Status TupleRangeFromTable(const Table& table, const compute::CastOptions& cast_options, compute::FunctionContext* ctx, Range* rows) { using row_type = typename std::decay::type; constexpr std::size_t n_columns = std::tuple_size::value; if (table.schema()->num_fields() != n_columns) { std::stringstream ss; ss << "Number of columns in the table does not match the width of the target: "; ss << table.schema()->num_fields() << " != " << n_columns; return Status::Invalid(ss.str()); } // TODO: Use std::size with C++17 if (rows->size() != static_cast(table.num_rows())) { std::stringstream ss; ss << "Number of rows in the table does not match the size of the target: "; ss << table.num_rows() << " != " << rows->size(); return Status::Invalid(ss.str()); } // Check that all columns have the correct type, otherwise cast them. std::shared_ptr
table_owner; std::reference_wrapper current_table(table); ARROW_RETURN_NOT_OK(internal::EnsureColumnTypes::Cast( table, &table_owner, cast_options, ctx, ¤t_table)); internal::TupleSetter::Fill(current_table.get(), rows); return Status::OK(); } } // namespace stl } // namespace arrow #endif // ARROW_STL_H