File size: 4,146 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
#pragma once
#include <cstdlib>
#include <qnnpack/operator.h>
namespace qnnpack {
class PrePackConvWeights final {
public:
PrePackConvWeights(
const pytorch_qnnp_operator_t convolution,
const uint8_t* kernel_zero_points,
const uint8_t* kernel,
const int32_t* bias);
void* getPackedWeights() const
{
return packed_weights_;
}
int64_t getOutputChannels() const
{
return output_channels_;
}
~PrePackConvWeights()
{
if (packed_weights_ != nullptr) {
free(packed_weights_);
}
}
PrePackConvWeights() = delete;
PrePackConvWeights(const PrePackConvWeights&) = delete;
PrePackConvWeights& operator=(const PrePackConvWeights&) = delete;
private:
void* packed_weights_ = nullptr;
int64_t output_channels_;
};
class PackBMatrix final {
public:
PackBMatrix(
size_t input_channels,
size_t output_channels,
const uint8_t* kernel_zero_points,
const float* requantization_scale,
const uint8_t* kernel,
const int32_t* bias);
// This constructor is to be used for dynamic mode
// quantization. In dynamic mode, we dont yet support
// per channel quantization, and paying the cost of
// memory allocation for per channel zero point and
// requant scale will hurt performance.
PackBMatrix(
size_t input_channels,
size_t output_channels,
const uint8_t kernel_zero_point,
const float requantization_scale,
const uint8_t* kernel,
const int32_t* bias);
void* getPackedWeights() const
{
return packed_weights_;
}
void unpackWeights(
const uint8_t* kernel_zero_points,
int8_t* kernel
) const;
size_t getInputChannels() const
{
return input_channels_;
}
size_t getOutputChannels() const
{
return output_channels_;
}
~PackBMatrix()
{
if (packed_weights_ != nullptr) {
free(packed_weights_);
}
}
PackBMatrix() = delete;
PackBMatrix(const PackBMatrix&) = delete;
PackBMatrix& operator=(const PackBMatrix&) = delete;
private:
void* packed_weights_ = nullptr;
size_t input_channels_;
size_t output_channels_;
};
enum pytorch_qnnp_status qnnpackLinear(
const size_t batch_size,
const size_t input_channels,
const size_t output_channels,
const uint8_t input_zero_point,
const uint8_t* kernel_zero_points,
const float* requantization_scales,
const uint8_t output_zero_point,
const uint8_t output_min,
const uint8_t output_max,
const uint8_t* input,
const size_t input_stride,
void* packed_weights,
uint8_t* output,
const size_t output_stride,
pthreadpool_t threadpool);
enum pytorch_qnnp_status qnnpackConv(
const pytorch_qnnp_operator_t convolution,
void* packed_weights,
const size_t batch_size,
const size_t input_depth,
const size_t input_height,
const size_t input_width,
const uint8_t input_zero_point,
const uint8_t* input,
const uint8_t* kernel_zero_points,
const float* requantization_scales,
const uint8_t output_zero_point,
const uint8_t output_min,
const uint8_t output_max,
uint8_t* output,
pthreadpool_t threadpool);
enum pytorch_qnnp_status qnnpackDeConv(
const pytorch_qnnp_operator_t deconvolution,
void* packed_weights,
const size_t batch_size,
const size_t input_height,
const size_t input_width,
const uint8_t input_zero_point,
const uint8_t* input,
const uint8_t* kernel_zero_points,
const float* requantization_scales,
const uint8_t output_zero_point,
const uint8_t output_min,
const uint8_t output_max,
uint8_t* output,
pthreadpool_t threadpool);
enum pytorch_qnnp_status qnnpackLinearDynamic(
const size_t batch_size,
const size_t input_channels,
const size_t output_channels,
const uint8_t input_zero_point,
const uint8_t* kernel_zero_points,
const float* dequantization_scales,
const uint8_t* input,
const size_t input_stride,
void* packed_weights,
const float* bias,
float* output,
const size_t output_stride,
pthreadpool_t threadpool);
} // namespace qnnpack
|