|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
#include <cuda.h> |
|
#include <cuda_fp16.h> |
|
#include <stdio.h> |
|
#include "custom_cuda_layers.h" |
|
|
|
#include <fstream> |
|
|
|
using namespace std; |
|
|
|
template <typename T> |
|
class Softmax { |
|
public: |
|
struct Config { |
|
size_t batchSize; |
|
size_t heads; |
|
size_t seq_length; |
|
size_t prob_depth; |
|
float temperature; |
|
bool mem_alloc; |
|
Config(size_t batch, size_t h, size_t seq, int prob_size = 0, bool mem_alloc = false) |
|
: batchSize(batch), |
|
heads(h), |
|
seq_length(seq), |
|
prob_depth(prob_size), |
|
temperature(1.0), |
|
mem_alloc(mem_alloc) |
|
{ |
|
} |
|
}; |
|
|
|
Softmax(Config config) : config_(config) {} |
|
|
|
~Softmax() {} |
|
|
|
void Forward(int bsz, T* vals, const T* attn_mask, cudaStream_t& stream) |
|
{ |
|
launch_attn_softmax<T>(vals, attn_mask, bsz, config_.heads, config_.seq_length, stream); |
|
} |
|
|
|
void Backward(int bsz, T* out_grad, const T* soft_out, cudaStream_t stream) |
|
{ |
|
launch_attn_softmax_backward_v2<T>( |
|
out_grad, soft_out, bsz, config_.heads, config_.seq_length, stream); |
|
} |
|
|
|
inline size_t GetProbDepth() const { return config_.prob_depth; } |
|
|
|
inline size_t GetBatchSize() const { return config_.batchSize; } |
|
|
|
inline size_t GetNumHeads() const { return config_.heads; } |
|
|
|
inline size_t GetSeqLength() const { return config_.seq_length; } |
|
|
|
inline void SetSeqLength(size_t seq_len) { config_.seq_length = seq_len; } |
|
|
|
private: |
|
Config config_; |
|
}; |
|
|