9#include <sparrow/record_batch.hpp>
58 void write(
const sparrow::record_batch& rb);
72 template <std::ranges::input_range R>
73 requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch>
74 void write(
const R& record_batches);
112 template <std::ranges::input_range R>
113 requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch>
128 bool m_schema_received{
false};
129 std::vector<sparrow::data_type> m_dtypes;
132 std::optional<CompressionType> m_compression;
137 template <std::ranges::input_range R>
138 requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch>
143 throw std::runtime_error(
"Cannot append record batches to a serializer that has been ended");
146 m_pstream->
reserve((m_schema_received ? 0 : 1) + m_pstream->size() + record_batches.size());
148 if (!m_schema_received)
150 m_schema_received =
true;
152 std::vector<uint8_t> schema_buffer;
156 m_pstream->write(std::move(schema_buffer));
159 for (
const auto& rb : record_batches)
163 throw std::invalid_argument(
"Record batch schema does not match serializer schema");
165 std::vector<uint8_t> buffer;
170 m_pstream->write(std::move(buffer));
180 template <std::ranges::input_range R>
181 requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch>
184 write(record_batches);
Type-erased wrapper for any stream-like object.
void write(const sparrow::record_batch &rb)
Writes a single record batch to the chunked stream.
chunk_serializer & operator<<(const sparrow::record_batch &rb)
chunk_serializer(chunked_memory_output_stream< std::vector< std::vector< uint8_t > > > &stream, std::optional< CompressionType > compression=std::nullopt)
Constructs a chunk serializer with a reference to a chunked memory output stream.
void end()
Finalizes the chunk serialization by writing an end-of-stream marker.
An output stream that writes data into separate memory chunks.
void reserve(std::size_t size)
Reserves capacity in the chunk container.
An output stream that writes data to a contiguous memory buffer.
SPARROW_IPC_API serialized_record_batch_info serialize_record_batch(const sparrow::record_batch &record_batch, any_output_stream &stream, std::optional< CompressionType > compression, std::optional< std::reference_wrapper< CompressionCache > > cache)
Serializes a record batch into a binary format following the Arrow IPC specification.
SPARROW_IPC_API void serialize_schema_message(const sparrow::record_batch &record_batch, any_output_stream &stream)
Serializes a schema message for a record batch into a byte buffer.
SPARROW_IPC_API std::vector< sparrow::data_type > get_column_dtypes(const sparrow::record_batch &rb)