File size: 16,680 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 |
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <utility>
#include "parquet/exception.h"
#include "parquet/schema.h"
#include "parquet/types.h"
namespace parquet {
static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm =
ParquetCipher::AES_GCM_V1;
static constexpr int32_t kMaximalAadMetadataLength = 256;
static constexpr bool kDefaultEncryptedFooter = true;
static constexpr bool kDefaultCheckSignature = true;
static constexpr bool kDefaultAllowPlaintextFiles = false;
static constexpr int32_t kAadFileUniqueLength = 8;
class ColumnDecryptionProperties;
using ColumnPathToDecryptionPropertiesMap =
std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>;
class ColumnEncryptionProperties;
using ColumnPathToEncryptionPropertiesMap =
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>;
class PARQUET_EXPORT DecryptionKeyRetriever {
public:
virtual std::string GetKey(const std::string& key_metadata) = 0;
virtual ~DecryptionKeyRetriever() {}
};
/// Simple integer key retriever
class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever {
public:
void PutKey(uint32_t key_id, const std::string& key);
std::string GetKey(const std::string& key_metadata) override;
private:
std::map<uint32_t, std::string> key_map_;
};
// Simple string key retriever
class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever {
public:
void PutKey(const std::string& key_id, const std::string& key);
std::string GetKey(const std::string& key_metadata) override;
private:
std::map<std::string, std::string> key_map_;
};
class PARQUET_EXPORT HiddenColumnException : public ParquetException {
public:
explicit HiddenColumnException(const std::string& columnPath)
: ParquetException(columnPath.c_str()) {}
};
class PARQUET_EXPORT KeyAccessDeniedException : public ParquetException {
public:
explicit KeyAccessDeniedException(const std::string& columnPath)
: ParquetException(columnPath.c_str()) {}
};
inline const uint8_t* str2bytes(const std::string& str) {
if (str.empty()) return NULLPTR;
char* cbytes = const_cast<char*>(str.c_str());
return reinterpret_cast<const uint8_t*>(cbytes);
}
inline ::arrow::util::span<const uint8_t> str2span(const std::string& str) {
if (str.empty()) {
return {};
}
return {reinterpret_cast<const uint8_t*>(str.data()), str.size()};
}
class PARQUET_EXPORT ColumnEncryptionProperties {
public:
class PARQUET_EXPORT Builder {
public:
/// Convenience builder for encrypted columns.
explicit Builder(const std::string& name) : Builder(name, true) {}
/// Convenience builder for encrypted columns.
explicit Builder(const std::shared_ptr<schema::ColumnPath>& path)
: Builder(path->ToDotString(), true) {}
/// Set a column-specific key.
/// If key is not set on an encrypted column, the column will
/// be encrypted with the footer key.
/// keyBytes Key length must be either 16, 24 or 32 bytes.
/// Caller is responsible for wiping out the input key array.
Builder* key(std::string column_key);
/// Set a key retrieval metadata.
/// use either key_metadata() or key_id(), not both
Builder* key_metadata(const std::string& key_metadata);
/// A convenience function to set key metadata using a string id.
/// Set a key retrieval metadata (converted from String).
/// use either key_metadata() or key_id(), not both
/// key_id will be converted to metadata (UTF-8 array).
Builder* key_id(const std::string& key_id);
std::shared_ptr<ColumnEncryptionProperties> build() {
return std::shared_ptr<ColumnEncryptionProperties>(
new ColumnEncryptionProperties(encrypted_, column_path_, key_, key_metadata_));
}
private:
const std::string column_path_;
bool encrypted_;
std::string key_;
std::string key_metadata_;
Builder(const std::string path, bool encrypted)
: column_path_(path), encrypted_(encrypted) {}
};
std::string column_path() const { return column_path_; }
bool is_encrypted() const { return encrypted_; }
bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; }
std::string key() const { return key_; }
std::string key_metadata() const { return key_metadata_; }
ColumnEncryptionProperties() = default;
ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default;
ColumnEncryptionProperties(ColumnEncryptionProperties&& other) = default;
~ColumnEncryptionProperties() { key_.clear(); }
private:
const std::string column_path_;
bool encrypted_;
bool encrypted_with_footer_key_;
std::string key_;
std::string key_metadata_;
explicit ColumnEncryptionProperties(bool encrypted, const std::string& column_path,
const std::string& key,
const std::string& key_metadata);
};
class PARQUET_EXPORT ColumnDecryptionProperties {
public:
class PARQUET_EXPORT Builder {
public:
explicit Builder(const std::string& name) : column_path_(name) {}
explicit Builder(const std::shared_ptr<schema::ColumnPath>& path)
: Builder(path->ToDotString()) {}
/// Set an explicit column key. If applied on a file that contains
/// key metadata for this column the metadata will be ignored,
/// the column will be decrypted with this key.
/// key length must be either 16, 24 or 32 bytes.
Builder* key(const std::string& key);
std::shared_ptr<ColumnDecryptionProperties> build();
private:
const std::string column_path_;
std::string key_;
};
ColumnDecryptionProperties() = default;
ColumnDecryptionProperties(const ColumnDecryptionProperties& other) = default;
ColumnDecryptionProperties(ColumnDecryptionProperties&& other) = default;
~ColumnDecryptionProperties() { key_.clear(); }
std::string column_path() const { return column_path_; }
std::string key() const { return key_; }
private:
const std::string column_path_;
std::string key_;
/// This class is only required for setting explicit column decryption keys -
/// to override key retriever (or to provide keys when key metadata and/or
/// key retriever are not available)
explicit ColumnDecryptionProperties(const std::string& column_path,
const std::string& key);
};
class PARQUET_EXPORT AADPrefixVerifier {
public:
/// Verifies identity (AAD Prefix) of individual file,
/// or of file collection in a data set.
/// Throws exception if an AAD prefix is wrong.
/// In a data set, AAD Prefixes should be collected,
/// and then checked for missing files.
virtual void Verify(const std::string& aad_prefix) = 0;
virtual ~AADPrefixVerifier() {}
};
class PARQUET_EXPORT FileDecryptionProperties {
public:
class PARQUET_EXPORT Builder {
public:
Builder() {
check_plaintext_footer_integrity_ = kDefaultCheckSignature;
plaintext_files_allowed_ = kDefaultAllowPlaintextFiles;
}
/// Set an explicit footer key. If applied on a file that contains
/// footer key metadata the metadata will be ignored, the footer
/// will be decrypted/verified with this key.
/// If explicit key is not set, footer key will be fetched from
/// key retriever.
/// With explicit keys or AAD prefix, new encryption properties object must be
/// created for each encrypted file.
/// Explicit encryption keys (footer and column) are cloned.
/// Upon completion of file reading, the cloned encryption keys in the properties
/// will be wiped out (array values set to 0).
/// Caller is responsible for wiping out the input key array.
/// param footerKey Key length must be either 16, 24 or 32 bytes.
Builder* footer_key(const std::string footer_key);
/// Set explicit column keys (decryption properties).
/// Its also possible to set a key retriever on this property object.
/// Upon file decryption, availability of explicit keys is checked before
/// invocation of the retriever callback.
/// If an explicit key is available for a footer or a column,
/// its key metadata will be ignored.
Builder* column_keys(
const ColumnPathToDecryptionPropertiesMap& column_decryption_properties);
/// Set a key retriever callback. Its also possible to
/// set explicit footer or column keys on this file property object.
/// Upon file decryption, availability of explicit keys is checked before
/// invocation of the retriever callback.
/// If an explicit key is available for a footer or a column,
/// its key metadata will be ignored.
Builder* key_retriever(const std::shared_ptr<DecryptionKeyRetriever>& key_retriever);
/// Skip integrity verification of plaintext footers.
/// If not called, integrity of plaintext footers will be checked in runtime,
/// and an exception will be thrown in the following situations:
/// - footer signing key is not available
/// (not passed, or not found by key retriever)
/// - footer content and signature don't match
Builder* disable_footer_signature_verification() {
check_plaintext_footer_integrity_ = false;
return this;
}
/// Explicitly supply the file AAD prefix.
/// A must when a prefix is used for file encryption, but not stored in file.
/// If AAD prefix is stored in file, it will be compared to the explicitly
/// supplied value and an exception will be thrown if they differ.
Builder* aad_prefix(const std::string& aad_prefix);
/// Set callback for verification of AAD Prefixes stored in file.
Builder* aad_prefix_verifier(std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier);
/// By default, reading plaintext (unencrypted) files is not
/// allowed when using a decryptor
/// - in order to detect files that were not encrypted by mistake.
/// However, the default behavior can be overridden by calling this method.
/// The caller should use then a different method to ensure encryption
/// of files with sensitive data.
Builder* plaintext_files_allowed() {
plaintext_files_allowed_ = true;
return this;
}
std::shared_ptr<FileDecryptionProperties> build() {
return std::shared_ptr<FileDecryptionProperties>(new FileDecryptionProperties(
footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_,
aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_));
}
private:
std::string footer_key_;
std::string aad_prefix_;
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
bool check_plaintext_footer_integrity_;
bool plaintext_files_allowed_;
};
~FileDecryptionProperties() { footer_key_.clear(); }
std::string column_key(const std::string& column_path) const;
std::string footer_key() const { return footer_key_; }
std::string aad_prefix() const { return aad_prefix_; }
const std::shared_ptr<DecryptionKeyRetriever>& key_retriever() const {
return key_retriever_;
}
bool check_plaintext_footer_integrity() const {
return check_plaintext_footer_integrity_;
}
bool plaintext_files_allowed() const { return plaintext_files_allowed_; }
const std::shared_ptr<AADPrefixVerifier>& aad_prefix_verifier() const {
return aad_prefix_verifier_;
}
private:
std::string footer_key_;
std::string aad_prefix_;
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier_;
const std::string empty_string_ = "";
ColumnPathToDecryptionPropertiesMap column_decryption_properties_;
std::shared_ptr<DecryptionKeyRetriever> key_retriever_;
bool check_plaintext_footer_integrity_;
bool plaintext_files_allowed_;
FileDecryptionProperties(
const std::string& footer_key,
std::shared_ptr<DecryptionKeyRetriever> key_retriever,
bool check_plaintext_footer_integrity, const std::string& aad_prefix,
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
const ColumnPathToDecryptionPropertiesMap& column_decryption_properties,
bool plaintext_files_allowed);
};
class PARQUET_EXPORT FileEncryptionProperties {
public:
class PARQUET_EXPORT Builder {
public:
explicit Builder(const std::string& footer_key)
: parquet_cipher_(kDefaultEncryptionAlgorithm),
encrypted_footer_(kDefaultEncryptedFooter) {
footer_key_ = footer_key;
store_aad_prefix_in_file_ = false;
}
/// Create files with plaintext footer.
/// If not called, the files will be created with encrypted footer (default).
Builder* set_plaintext_footer() {
encrypted_footer_ = false;
return this;
}
/// Set encryption algorithm.
/// If not called, files will be encrypted with AES_GCM_V1 (default).
Builder* algorithm(ParquetCipher::type parquet_cipher) {
parquet_cipher_ = parquet_cipher;
return this;
}
/// Set a key retrieval metadata (converted from String).
/// use either footer_key_metadata or footer_key_id, not both.
Builder* footer_key_id(const std::string& key_id);
/// Set a key retrieval metadata.
/// use either footer_key_metadata or footer_key_id, not both.
Builder* footer_key_metadata(const std::string& footer_key_metadata);
/// Set the file AAD Prefix.
Builder* aad_prefix(const std::string& aad_prefix);
/// Skip storing AAD Prefix in file.
/// If not called, and if AAD Prefix is set, it will be stored.
Builder* disable_aad_prefix_storage();
/// Set the list of encrypted columns and their properties (keys etc).
/// If not called, all columns will be encrypted with the footer key.
/// If called, the file columns not in the list will be left unencrypted.
Builder* encrypted_columns(
const ColumnPathToEncryptionPropertiesMap& encrypted_columns);
std::shared_ptr<FileEncryptionProperties> build() {
return std::shared_ptr<FileEncryptionProperties>(new FileEncryptionProperties(
parquet_cipher_, footer_key_, footer_key_metadata_, encrypted_footer_,
aad_prefix_, store_aad_prefix_in_file_, encrypted_columns_));
}
private:
ParquetCipher::type parquet_cipher_;
bool encrypted_footer_;
std::string footer_key_;
std::string footer_key_metadata_;
std::string aad_prefix_;
bool store_aad_prefix_in_file_;
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
};
~FileEncryptionProperties() { footer_key_.clear(); }
bool encrypted_footer() const { return encrypted_footer_; }
EncryptionAlgorithm algorithm() const { return algorithm_; }
std::string footer_key() const { return footer_key_; }
std::string footer_key_metadata() const { return footer_key_metadata_; }
std::string file_aad() const { return file_aad_; }
std::shared_ptr<ColumnEncryptionProperties> column_encryption_properties(
const std::string& column_path);
ColumnPathToEncryptionPropertiesMap encrypted_columns() const {
return encrypted_columns_;
}
private:
EncryptionAlgorithm algorithm_;
std::string footer_key_;
std::string footer_key_metadata_;
bool encrypted_footer_;
std::string file_aad_;
std::string aad_prefix_;
bool store_aad_prefix_in_file_;
ColumnPathToEncryptionPropertiesMap encrypted_columns_;
FileEncryptionProperties(ParquetCipher::type cipher, const std::string& footer_key,
const std::string& footer_key_metadata, bool encrypted_footer,
const std::string& aad_prefix, bool store_aad_prefix_in_file,
const ColumnPathToEncryptionPropertiesMap& encrypted_columns);
};
} // namespace parquet
|