sparrow-ipc 0.2.0
Loading...
Searching...
No Matches
serialize_utils.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <ranges>
4#include <vector>
5
6#include <sparrow/record_batch.hpp>
7
11#include "sparrow_ipc/utils.hpp"
12
13namespace sparrow_ipc
14{
26 serialize_schema_message(const sparrow::record_batch& record_batch, any_output_stream& stream);
27
41 [[nodiscard]] SPARROW_IPC_API std::size_t
42 calculate_schema_message_size(const sparrow::record_batch& record_batch);
43
61 [[nodiscard]] SPARROW_IPC_API std::size_t
62 calculate_record_batch_message_size(const sparrow::record_batch& record_batch,
63 std::optional<CompressionType> compression = std::nullopt,
64 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt);
65
80 template <std::ranges::input_range R>
81 requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch>
82 [[nodiscard]] std::size_t calculate_total_serialized_size(const R& record_batches,
83 std::optional<CompressionType> compression = std::nullopt,
84 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt)
85 {
86 if (record_batches.empty())
87 {
88 return 0;
89 }
90
91 if (!utils::check_record_batches_consistency(record_batches))
92 {
93 throw std::invalid_argument("Record batches have inconsistent schemas");
94 }
95
96 // Calculate schema message size (only once)
97 auto it = std::ranges::begin(record_batches);
98 std::size_t total_size = calculate_schema_message_size(*it);
99
100 // Calculate record batch message sizes
101 for (const auto& record_batch : record_batches)
102 {
103 total_size += calculate_record_batch_message_size(record_batch, compression, cache);
104 }
105
106 return total_size;
107 }
108
128 SPARROW_IPC_API void fill_body(const sparrow::arrow_proxy& arrow_proxy, any_output_stream& stream,
129 std::optional<CompressionType> compression = std::nullopt,
130 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt);
131
145 SPARROW_IPC_API void generate_body(const sparrow::record_batch& record_batch, any_output_stream& stream,
146 std::optional<CompressionType> compression = std::nullopt,
147 std::optional<std::reference_wrapper<CompressionCache>> cache = std::nullopt);
148
149 SPARROW_IPC_API std::vector<sparrow::data_type> get_column_dtypes(const sparrow::record_batch& rb);
150}
Type-erased wrapper for any stream-like object.
#define SPARROW_IPC_API
Definition config.hpp:12
bool check_record_batches_consistency(const R &record_batches)
Checks if all record batches in a collection have consistent structure.
Definition utils.hpp:33
SPARROW_IPC_API void generate_body(const sparrow::record_batch &record_batch, any_output_stream &stream, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Generates a serialized body from a record batch.
SPARROW_IPC_API void fill_body(const sparrow::arrow_proxy &arrow_proxy, any_output_stream &stream, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Fills the body vector with serialized data from an arrow proxy and its children.
SPARROW_IPC_API void serialize_schema_message(const sparrow::record_batch &record_batch, any_output_stream &stream)
Serializes a schema message for a record batch into a byte buffer.
SPARROW_IPC_API std::size_t calculate_record_batch_message_size(const sparrow::record_batch &record_batch, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Calculates the total serialized size of a record batch message.
std::size_t calculate_total_serialized_size(const R &record_batches, std::optional< CompressionType > compression=std::nullopt, std::optional< std::reference_wrapper< CompressionCache > > cache=std::nullopt)
Calculates the total serialized size for a collection of record batches.
SPARROW_IPC_API std::size_t calculate_schema_message_size(const sparrow::record_batch &record_batch)
Calculates the total serialized size of a schema message.
SPARROW_IPC_API std::vector< sparrow::data_type > get_column_dtypes(const sparrow::record_batch &rb)