sparrow-ipc 0.2.0
Loading...
Searching...
No Matches
flatbuffer_utils.hpp
Go to the documentation of this file.
1#pragma once
2#include <flatbuffers/flatbuffers.h>
3#include <Message_generated.h>
4
5#include <sparrow/c_interface.hpp>
6#include <sparrow/record_batch.hpp>
7
8#include "File_generated.h"
10#include "sparrow_ipc/utils.hpp"
11
12namespace sparrow_ipc
13{
14 // Creates a Flatbuffers Decimal type from a format string
15 // The format string is expected to be in the format "d:precision,scale"
16 [[nodiscard]] std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
18 flatbuffers::FlatBufferBuilder& builder,
19 std::string_view format_str,
20 const int32_t bitWidth
21 );
22
23 // Creates a Flatbuffers type from a format string
24 // This function maps a sparrow data type to the corresponding Flatbuffers type
25 [[nodiscard]] std::pair<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>>
26 get_flatbuffer_type(flatbuffers::FlatBufferBuilder& builder, std::string_view format_str);
27
44 [[nodiscard]] flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<org::apache::arrow::flatbuf::KeyValue>>>
45 create_metadata(flatbuffers::FlatBufferBuilder& builder, const ArrowSchema& arrow_schema);
46
68 [[nodiscard]] ::flatbuffers::Offset<org::apache::arrow::flatbuf::Field> create_field(
69 flatbuffers::FlatBufferBuilder& builder,
70 const ArrowSchema& arrow_schema,
71 std::optional<std::string_view> name_override = std::nullopt
72 );
73
94 [[nodiscard]] ::flatbuffers::Offset<
95 ::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
96 create_children(flatbuffers::FlatBufferBuilder& builder, const sparrow::record_batch& record_batch);
97
98
116 [[nodiscard]] ::flatbuffers::Offset<
117 ::flatbuffers::Vector<::flatbuffers::Offset<org::apache::arrow::flatbuf::Field>>>
118 create_children(flatbuffers::FlatBufferBuilder& builder, const ArrowSchema& arrow_schema);
119
136 [[nodiscard]] flatbuffers::FlatBufferBuilder
137 get_schema_message_builder(const sparrow::record_batch& record_batch);
138
156 const sparrow::arrow_proxy& arrow_proxy,
157 std::vector<org::apache::arrow::flatbuf::FieldNode>& nodes
158 );
159
171 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::FieldNode>
172 create_fieldnodes(const sparrow::record_batch& record_batch);
173
174 namespace details
175 {
176 template <typename Func>
178 const sparrow::arrow_proxy& arrow_proxy,
179 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_buffers,
180 int64_t& offset,
181 Func&& get_buffer_size
182 )
183 {
184 const auto& buffers = arrow_proxy.buffers();
185 for (const auto& buffer : buffers)
186 {
187 int64_t size = get_buffer_size(buffer);
188 flatbuf_buffers.emplace_back(offset, size);
189 offset += utils::align_to_8(size);
190 }
191 for (const auto& child : arrow_proxy.children())
192 {
193 fill_buffers_impl(child, flatbuf_buffers, offset, get_buffer_size);
194 }
195 }
196
197 template <typename Func>
198 std::vector<org::apache::arrow::flatbuf::Buffer>
199 get_buffers_impl(const sparrow::record_batch& record_batch, Func&& fill_buffers_func)
200 {
201 std::vector<org::apache::arrow::flatbuf::Buffer> buffers;
202 int64_t offset = 0;
203 for (const auto& column : record_batch.columns())
204 {
205 const auto& arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(column);
206 fill_buffers_func(arrow_proxy, buffers, offset);
207 }
208 return buffers;
209 }
210 } // namespace details
211
230 const sparrow::arrow_proxy& arrow_proxy,
231 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_buffers,
232 int64_t& offset
233 );
234
249 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::Buffer>
250 get_buffers(const sparrow::record_batch& record_batch);
251
266 const sparrow::arrow_proxy& arrow_proxy,
267 std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_compressed_buffers,
268 int64_t& offset,
269 const CompressionType compression_type,
270 CompressionCache& cache
271 );
272
287 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::Buffer> get_compressed_buffers(
288 const sparrow::record_batch& record_batch,
289 const CompressionType compression_type,
290 CompressionCache& cache
291 );
292
307 [[nodiscard]] int64_t calculate_body_size(
308 const sparrow::arrow_proxy& arrow_proxy,
309 std::optional<CompressionType> compression = std::nullopt,
310 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
311 );
312
327 [[nodiscard]] int64_t calculate_body_size(
328 const sparrow::record_batch& record_batch,
329 std::optional<CompressionType> compression = std::nullopt,
330 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
331 );
332
352 [[nodiscard]] flatbuffers::FlatBufferBuilder get_record_batch_message_builder(
353 const sparrow::record_batch& record_batch,
354 std::optional<CompressionType> compression = std::nullopt,
355 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt
356 );
357
358 // Helper function to extract and parse the footer from Arrow IPC file data
359 [[nodiscard]] SPARROW_IPC_API const org::apache::arrow::flatbuf::Footer* get_footer_from_file_data(std::span<const uint8_t> file_data);
360}
#define SPARROW_IPC_API
Definition config.hpp:12
std::vector< org::apache::arrow::flatbuf::Buffer > get_buffers_impl(const sparrow::record_batch &record_batch, Func &&fill_buffers_func)
void fill_buffers_impl(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_buffers, int64_t &offset, Func &&get_buffer_size)
SPARROW_IPC_API size_t align_to_8(const size_t n)
flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< org::apache::arrow::flatbuf::KeyValue > > > create_metadata(flatbuffers::FlatBufferBuilder &builder, const ArrowSchema &arrow_schema)
Creates a FlatBuffers vector of KeyValue pairs from ArrowSchema metadata.
::flatbuffers::Offset< org::apache::arrow::flatbuf::Field > create_field(flatbuffers::FlatBufferBuilder &builder, const ArrowSchema &arrow_schema, std::optional< std::string_view > name_override=std::nullopt)
Creates a FlatBuffer Field object from an ArrowSchema.
std::vector< org::apache::arrow::flatbuf::Buffer > get_buffers(const sparrow::record_batch &record_batch)
Extracts buffer information from a record batch for serialization.
void fill_fieldnodes(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::FieldNode > &nodes)
Recursively fills a vector of FieldNode objects from an arrow_proxy and its children.
flatbuffers::FlatBufferBuilder get_record_batch_message_builder(const sparrow::record_batch &record_batch, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Creates a FlatBuffer message containing a serialized Apache Arrow RecordBatch.
int64_t calculate_body_size(const sparrow::arrow_proxy &arrow_proxy, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Calculates the total aligned size in bytes of all buffers in an Arrow array structure.
void fill_compressed_buffers(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_compressed_buffers, int64_t &offset, const CompressionType compression_type, CompressionCache &cache)
Recursively populates a vector with compressed buffer metadata from an Arrow proxy.
SPARROW_IPC_API const org::apache::arrow::flatbuf::Footer * get_footer_from_file_data(std::span< const uint8_t > file_data)
std::vector< org::apache::arrow::flatbuf::Buffer > get_compressed_buffers(const sparrow::record_batch &record_batch, const CompressionType compression_type, CompressionCache &cache)
Retrieves metadata describing the layout of compressed buffers within a record batch.
std::pair< org::apache::arrow::flatbuf::Type, flatbuffers::Offset< void > > get_flatbuffer_decimal_type(flatbuffers::FlatBufferBuilder &builder, std::string_view format_str, const int32_t bitWidth)
flatbuffers::FlatBufferBuilder get_schema_message_builder(const sparrow::record_batch &record_batch)
Creates a FlatBuffer builder containing a serialized Arrow schema message.
void fill_buffers(const sparrow::arrow_proxy &arrow_proxy, std::vector< org::apache::arrow::flatbuf::Buffer > &flatbuf_buffers, int64_t &offset)
Recursively fills a vector of FlatBuffer Buffer objects with buffer information from an Arrow proxy.
std::pair< org::apache::arrow::flatbuf::Type, flatbuffers::Offset< void > > get_flatbuffer_type(flatbuffers::FlatBufferBuilder &builder, std::string_view format_str)
std::vector< org::apache::arrow::flatbuf::FieldNode > create_fieldnodes(const sparrow::record_batch &record_batch)
Creates a vector of Apache Arrow FieldNode objects from a record batch.
::flatbuffers::Offset< ::flatbuffers::Vector<::flatbuffers::Offset< org::apache::arrow::flatbuf::Field > > > create_children(flatbuffers::FlatBufferBuilder &builder, const sparrow::record_batch &record_batch)
Creates a FlatBuffers vector of Field objects from a record batch.