|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
#include <cstdint> |
|
#include <memory> |
|
#include <optional> |
|
#include <string> |
|
|
|
#include "parquet/size_statistics.h" |
|
#include "parquet/statistics.h" |
|
#include "parquet/types.h" |
|
|
|
namespace parquet { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Page { |
|
public: |
|
Page(const std::shared_ptr<Buffer>& buffer, PageType::type type) |
|
: buffer_(buffer), type_(type) {} |
|
|
|
PageType::type type() const { return type_; } |
|
|
|
std::shared_ptr<Buffer> buffer() const { return buffer_; } |
|
|
|
|
|
const uint8_t* data() const { return buffer_->data(); } |
|
|
|
|
|
int32_t size() const { return static_cast<int32_t>(buffer_->size()); } |
|
|
|
private: |
|
std::shared_ptr<Buffer> buffer_; |
|
PageType::type type_; |
|
}; |
|
|
|
|
|
class DataPage : public Page { |
|
public: |
|
int32_t num_values() const { return num_values_; } |
|
Encoding::type encoding() const { return encoding_; } |
|
int64_t uncompressed_size() const { return uncompressed_size_; } |
|
const EncodedStatistics& statistics() const { return statistics_; } |
|
|
|
|
|
|
|
std::optional<int64_t> first_row_index() const { return first_row_index_; } |
|
const SizeStatistics& size_statistics() const { return size_statistics_; } |
|
|
|
virtual ~DataPage() = default; |
|
|
|
protected: |
|
DataPage(PageType::type type, const std::shared_ptr<Buffer>& buffer, int32_t num_values, |
|
Encoding::type encoding, int64_t uncompressed_size, |
|
EncodedStatistics statistics, std::optional<int64_t> first_row_index, |
|
SizeStatistics size_statistics) |
|
: Page(buffer, type), |
|
num_values_(num_values), |
|
encoding_(encoding), |
|
uncompressed_size_(uncompressed_size), |
|
statistics_(std::move(statistics)), |
|
first_row_index_(std::move(first_row_index)), |
|
size_statistics_(std::move(size_statistics)) {} |
|
|
|
int32_t num_values_; |
|
Encoding::type encoding_; |
|
int64_t uncompressed_size_; |
|
EncodedStatistics statistics_; |
|
|
|
std::optional<int64_t> first_row_index_; |
|
SizeStatistics size_statistics_; |
|
}; |
|
|
|
class DataPageV1 : public DataPage { |
|
public: |
|
DataPageV1(const std::shared_ptr<Buffer>& buffer, int32_t num_values, |
|
Encoding::type encoding, Encoding::type definition_level_encoding, |
|
Encoding::type repetition_level_encoding, int64_t uncompressed_size, |
|
EncodedStatistics statistics = EncodedStatistics(), |
|
std::optional<int64_t> first_row_index = std::nullopt, |
|
SizeStatistics size_statistics = SizeStatistics()) |
|
: DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size, |
|
std::move(statistics), std::move(first_row_index), |
|
std::move(size_statistics)), |
|
definition_level_encoding_(definition_level_encoding), |
|
repetition_level_encoding_(repetition_level_encoding) {} |
|
|
|
Encoding::type repetition_level_encoding() const { return repetition_level_encoding_; } |
|
|
|
Encoding::type definition_level_encoding() const { return definition_level_encoding_; } |
|
|
|
private: |
|
Encoding::type definition_level_encoding_; |
|
Encoding::type repetition_level_encoding_; |
|
}; |
|
|
|
class DataPageV2 : public DataPage { |
|
public: |
|
DataPageV2(const std::shared_ptr<Buffer>& buffer, int32_t num_values, int32_t num_nulls, |
|
int32_t num_rows, Encoding::type encoding, |
|
int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length, |
|
int64_t uncompressed_size, bool is_compressed = false, |
|
EncodedStatistics statistics = EncodedStatistics(), |
|
std::optional<int64_t> first_row_index = std::nullopt, |
|
SizeStatistics size_statistics = SizeStatistics()) |
|
: DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size, |
|
std::move(statistics), std::move(first_row_index), |
|
std::move(size_statistics)), |
|
num_nulls_(num_nulls), |
|
num_rows_(num_rows), |
|
definition_levels_byte_length_(definition_levels_byte_length), |
|
repetition_levels_byte_length_(repetition_levels_byte_length), |
|
is_compressed_(is_compressed) {} |
|
|
|
int32_t num_nulls() const { return num_nulls_; } |
|
|
|
int32_t num_rows() const { return num_rows_; } |
|
|
|
int32_t definition_levels_byte_length() const { return definition_levels_byte_length_; } |
|
|
|
int32_t repetition_levels_byte_length() const { return repetition_levels_byte_length_; } |
|
|
|
bool is_compressed() const { return is_compressed_; } |
|
|
|
private: |
|
int32_t num_nulls_; |
|
int32_t num_rows_; |
|
int32_t definition_levels_byte_length_; |
|
int32_t repetition_levels_byte_length_; |
|
bool is_compressed_; |
|
}; |
|
|
|
class DictionaryPage : public Page { |
|
public: |
|
DictionaryPage(const std::shared_ptr<Buffer>& buffer, int32_t num_values, |
|
Encoding::type encoding, bool is_sorted = false) |
|
: Page(buffer, PageType::DICTIONARY_PAGE), |
|
num_values_(num_values), |
|
encoding_(encoding), |
|
is_sorted_(is_sorted) {} |
|
|
|
int32_t num_values() const { return num_values_; } |
|
|
|
Encoding::type encoding() const { return encoding_; } |
|
|
|
bool is_sorted() const { return is_sorted_; } |
|
|
|
private: |
|
int32_t num_values_; |
|
Encoding::type encoding_; |
|
bool is_sorted_; |
|
}; |
|
|
|
} |
|
|