|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
#include <array> |
|
#include <cstdint> |
|
#include <cstring> |
|
#include <iosfwd> |
|
#include <limits> |
|
#include <type_traits> |
|
|
|
#include "arrow/util/endian.h" |
|
#include "arrow/util/macros.h" |
|
#include "arrow/util/ubsan.h" |
|
#include "arrow/util/visibility.h" |
|
|
|
namespace arrow { |
|
namespace util { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ARROW_EXPORT Float16 { |
|
public: |
|
Float16() = default; |
|
explicit Float16(float f) : Float16(FromFloat(f)) {} |
|
explicit Float16(double d) : Float16(FromDouble(d)) {} |
|
template <typename T, |
|
typename std::enable_if_t<std::is_convertible_v<T, double>>* = NULLPTR> |
|
explicit Float16(T v) : Float16(static_cast<double>(v)) {} |
|
|
|
|
|
constexpr static Float16 FromBits(uint16_t bits) { return Float16{bits, bool{}}; } |
|
|
|
static Float16 FromFloat(float f); |
|
|
|
static Float16 FromDouble(double d); |
|
|
|
|
|
static Float16 FromBytes(const uint8_t* src) { |
|
return FromBits(SafeLoadAs<uint16_t>(src)); |
|
} |
|
|
|
|
|
static Float16 FromLittleEndian(const uint8_t* src) { |
|
return FromBits(::arrow::bit_util::FromLittleEndian(SafeLoadAs<uint16_t>(src))); |
|
} |
|
|
|
|
|
static Float16 FromBigEndian(const uint8_t* src) { |
|
return FromBits(::arrow::bit_util::FromBigEndian(SafeLoadAs<uint16_t>(src))); |
|
} |
|
|
|
|
|
constexpr uint16_t bits() const { return bits_; } |
|
|
|
|
|
constexpr bool signbit() const { return (bits_ & 0x8000) != 0; } |
|
|
|
|
|
constexpr bool is_nan() const { return (bits_ & 0x7fff) > 0x7c00; } |
|
|
|
constexpr bool is_infinity() const { return (bits_ & 0x7fff) == 0x7c00; } |
|
|
|
constexpr bool is_finite() const { return (bits_ & 0x7c00) != 0x7c00; } |
|
|
|
constexpr bool is_zero() const { return (bits_ & 0x7fff) == 0; } |
|
|
|
|
|
float ToFloat() const; |
|
|
|
double ToDouble() const; |
|
|
|
explicit operator float() const { return ToFloat(); } |
|
explicit operator double() const { return ToDouble(); } |
|
|
|
|
|
void ToBytes(uint8_t* dest) const { std::memcpy(dest, &bits_, sizeof(bits_)); } |
|
|
|
constexpr std::array<uint8_t, 2> ToBytes() const { |
|
#if ARROW_LITTLE_ENDIAN |
|
return ToLittleEndian(); |
|
#else |
|
return ToBigEndian(); |
|
#endif |
|
} |
|
|
|
|
|
void ToLittleEndian(uint8_t* dest) const { |
|
const auto bytes = ToLittleEndian(); |
|
std::memcpy(dest, bytes.data(), bytes.size()); |
|
} |
|
|
|
constexpr std::array<uint8_t, 2> ToLittleEndian() const { |
|
return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)}; |
|
} |
|
|
|
|
|
void ToBigEndian(uint8_t* dest) const { |
|
const auto bytes = ToBigEndian(); |
|
std::memcpy(dest, bytes.data(), bytes.size()); |
|
} |
|
|
|
constexpr std::array<uint8_t, 2> ToBigEndian() const { |
|
return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)}; |
|
} |
|
|
|
constexpr Float16 operator-() const { return FromBits(bits_ ^ 0x8000); } |
|
constexpr Float16 operator+() const { return FromBits(bits_); } |
|
|
|
friend constexpr bool operator==(Float16 lhs, Float16 rhs) { |
|
if (lhs.is_nan() || rhs.is_nan()) return false; |
|
return Float16::CompareEq(lhs, rhs); |
|
} |
|
friend constexpr bool operator!=(Float16 lhs, Float16 rhs) { return !(lhs == rhs); } |
|
|
|
friend constexpr bool operator<(Float16 lhs, Float16 rhs) { |
|
if (lhs.is_nan() || rhs.is_nan()) return false; |
|
return Float16::CompareLt(lhs, rhs); |
|
} |
|
friend constexpr bool operator>(Float16 lhs, Float16 rhs) { return rhs < lhs; } |
|
|
|
friend constexpr bool operator<=(Float16 lhs, Float16 rhs) { |
|
if (lhs.is_nan() || rhs.is_nan()) return false; |
|
return !Float16::CompareLt(rhs, lhs); |
|
} |
|
friend constexpr bool operator>=(Float16 lhs, Float16 rhs) { return rhs <= lhs; } |
|
|
|
ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os, Float16 arg); |
|
|
|
protected: |
|
uint16_t bits_; |
|
|
|
private: |
|
constexpr Float16(uint16_t bits, bool) : bits_(bits) {} |
|
|
|
|
|
static constexpr bool CompareEq(Float16 lhs, Float16 rhs) { |
|
return (lhs.bits() == rhs.bits()) || (lhs.is_zero() && rhs.is_zero()); |
|
} |
|
static constexpr bool CompareLt(Float16 lhs, Float16 rhs) { |
|
if (lhs.signbit()) { |
|
if (rhs.signbit()) { |
|
|
|
return lhs.bits() > rhs.bits(); |
|
} else { |
|
|
|
return !lhs.is_zero() || rhs.bits() != 0; |
|
} |
|
} else if (rhs.signbit()) { |
|
return false; |
|
} else { |
|
|
|
return lhs.bits() < rhs.bits(); |
|
} |
|
} |
|
}; |
|
|
|
static_assert(std::is_trivial_v<Float16>); |
|
|
|
} |
|
} |
|
|
|
|
|
template <> |
|
class std::numeric_limits<arrow::util::Float16> { |
|
using T = arrow::util::Float16; |
|
|
|
public: |
|
static constexpr bool is_specialized = true; |
|
static constexpr bool is_signed = true; |
|
static constexpr bool has_infinity = true; |
|
static constexpr bool has_quiet_NaN = true; |
|
|
|
static constexpr T min() { return T::FromBits(0b0000010000000000); } |
|
static constexpr T max() { return T::FromBits(0b0111101111111111); } |
|
static constexpr T lowest() { return -max(); } |
|
|
|
static constexpr T infinity() { return T::FromBits(0b0111110000000000); } |
|
|
|
static constexpr T quiet_NaN() { return T::FromBits(0b0111111111111111); } |
|
}; |
|
|