|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
#include <cstdint> |
|
#include <memory> |
|
#include <string> |
|
#include <unordered_map> |
|
#include <vector> |
|
|
|
#include "arrow/io/interfaces.h" |
|
#include "arrow/util/macros.h" |
|
#include "arrow/util/visibility.h" |
|
|
|
namespace arrow { |
|
|
|
class Buffer; |
|
class MemoryPool; |
|
class Status; |
|
|
|
namespace io { |
|
|
|
class HdfsReadableFile; |
|
class HdfsOutputStream; |
|
|
|
|
|
struct ObjectType { |
|
enum type { FILE, DIRECTORY }; |
|
}; |
|
|
|
|
|
struct ARROW_EXPORT FileStatistics { |
|
|
|
int64_t size; |
|
ObjectType::type kind; |
|
}; |
|
|
|
class ARROW_EXPORT FileSystem { |
|
public: |
|
virtual ~FileSystem() = default; |
|
|
|
virtual Status MakeDirectory(const std::string& path) = 0; |
|
|
|
virtual Status DeleteDirectory(const std::string& path) = 0; |
|
|
|
virtual Status GetChildren(const std::string& path, |
|
std::vector<std::string>* listing) = 0; |
|
|
|
virtual Status Rename(const std::string& src, const std::string& dst) = 0; |
|
|
|
virtual Status Stat(const std::string& path, FileStatistics* stat) = 0; |
|
}; |
|
|
|
struct HdfsPathInfo { |
|
ObjectType::type kind; |
|
|
|
std::string name; |
|
std::string owner; |
|
std::string group; |
|
|
|
|
|
int64_t size; |
|
int64_t block_size; |
|
|
|
int32_t last_modified_time; |
|
int32_t last_access_time; |
|
|
|
int16_t replication; |
|
int16_t permissions; |
|
}; |
|
|
|
struct HdfsConnectionConfig { |
|
std::string host; |
|
int port; |
|
std::string user; |
|
std::string kerb_ticket; |
|
std::unordered_map<std::string, std::string> extra_conf; |
|
}; |
|
|
|
class ARROW_EXPORT HadoopFileSystem : public FileSystem { |
|
public: |
|
~HadoopFileSystem() override; |
|
|
|
|
|
|
|
|
|
|
|
|
|
static Status Connect(const HdfsConnectionConfig* config, |
|
std::shared_ptr<HadoopFileSystem>* fs); |
|
|
|
|
|
|
|
|
|
|
|
Status MakeDirectory(const std::string& path) override; |
|
|
|
|
|
|
|
|
|
|
|
Status Delete(const std::string& path, bool recursive = false); |
|
|
|
Status DeleteDirectory(const std::string& path) override; |
|
|
|
|
|
|
|
|
|
Status Disconnect(); |
|
|
|
|
|
|
|
bool Exists(const std::string& path); |
|
|
|
|
|
|
|
|
|
Status GetPathInfo(const std::string& path, HdfsPathInfo* info); |
|
|
|
|
|
|
|
Status GetCapacity(int64_t* nbytes); |
|
|
|
|
|
|
|
Status GetUsed(int64_t* nbytes); |
|
|
|
Status GetChildren(const std::string& path, std::vector<std::string>* listing) override; |
|
|
|
|
|
|
|
|
|
|
|
Status ListDirectory(const std::string& path, std::vector<HdfsPathInfo>* listing); |
|
|
|
|
|
|
|
|
|
|
|
|
|
Status GetWorkingDirectory(std::string* out); |
|
|
|
|
|
|
|
|
|
|
|
|
|
Status Chown(const std::string& path, const char* owner, const char* group); |
|
|
|
|
|
|
|
|
|
|
|
|
|
Status Chmod(const std::string& path, int mode); |
|
|
|
|
|
|
|
Status Rename(const std::string& src, const std::string& dst) override; |
|
|
|
Status Copy(const std::string& src, const std::string& dst); |
|
|
|
Status Move(const std::string& src, const std::string& dst); |
|
|
|
Status Stat(const std::string& path, FileStatistics* stat) override; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Status OpenReadable(const std::string& path, int32_t buffer_size, |
|
std::shared_ptr<HdfsReadableFile>* file); |
|
|
|
Status OpenReadable(const std::string& path, int32_t buffer_size, |
|
const io::IOContext& io_context, |
|
std::shared_ptr<HdfsReadableFile>* file); |
|
|
|
Status OpenReadable(const std::string& path, std::shared_ptr<HdfsReadableFile>* file); |
|
|
|
Status OpenReadable(const std::string& path, const io::IOContext& io_context, |
|
std::shared_ptr<HdfsReadableFile>* file); |
|
|
|
|
|
|
|
|
|
|
|
|
|
Status OpenWritable(const std::string& path, bool append, int32_t buffer_size, |
|
int16_t replication, int64_t default_block_size, |
|
std::shared_ptr<HdfsOutputStream>* file); |
|
|
|
Status OpenWritable(const std::string& path, bool append, |
|
std::shared_ptr<HdfsOutputStream>* file); |
|
|
|
private: |
|
friend class HdfsReadableFile; |
|
friend class HdfsOutputStream; |
|
|
|
class ARROW_NO_EXPORT HadoopFileSystemImpl; |
|
std::unique_ptr<HadoopFileSystemImpl> impl_; |
|
|
|
HadoopFileSystem(); |
|
ARROW_DISALLOW_COPY_AND_ASSIGN(HadoopFileSystem); |
|
}; |
|
|
|
class ARROW_EXPORT HdfsReadableFile : public RandomAccessFile { |
|
public: |
|
~HdfsReadableFile() override; |
|
|
|
Status Close() override; |
|
|
|
bool closed() const override; |
|
|
|
|
|
|
|
Result<int64_t> Read(int64_t nbytes, void* out) override; |
|
Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override; |
|
Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override; |
|
Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override; |
|
|
|
Status Seek(int64_t position) override; |
|
Result<int64_t> Tell() const override; |
|
Result<int64_t> GetSize() override; |
|
|
|
private: |
|
explicit HdfsReadableFile(const io::IOContext&); |
|
|
|
class ARROW_NO_EXPORT HdfsReadableFileImpl; |
|
std::unique_ptr<HdfsReadableFileImpl> impl_; |
|
|
|
friend class HadoopFileSystem::HadoopFileSystemImpl; |
|
|
|
ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsReadableFile); |
|
}; |
|
|
|
|
|
|
|
class ARROW_EXPORT HdfsOutputStream : public OutputStream { |
|
public: |
|
~HdfsOutputStream() override; |
|
|
|
Status Close() override; |
|
|
|
bool closed() const override; |
|
|
|
using OutputStream::Write; |
|
Status Write(const void* buffer, int64_t nbytes) override; |
|
|
|
Status Flush() override; |
|
|
|
Result<int64_t> Tell() const override; |
|
|
|
private: |
|
class ARROW_NO_EXPORT HdfsOutputStreamImpl; |
|
std::unique_ptr<HdfsOutputStreamImpl> impl_; |
|
|
|
friend class HadoopFileSystem::HadoopFileSystemImpl; |
|
|
|
HdfsOutputStream(); |
|
|
|
ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsOutputStream); |
|
}; |
|
|
|
ARROW_EXPORT Status HaveLibHdfs(); |
|
|
|
} |
|
} |
|
|