all-MiniLM-L6-v2-onnx
This is the ONNX-ported version of the sentence-transformers/all-MiniLM-L6-v2 for generating text embeddings.
Model details
Embedding dimension: 384
Max sequence length: 256
It has been converted with following labels:
input_names=["input_ids", "attention_mask"],
output_names=["token_embeddings", "sentence_embedding"],
dynamic_axes={
"input_ids": {0: "batch_size", 1: "sequence_length"},
"attention_mask": {0: "batch_size", 1: "sequence_length"},
"token_embeddings": {0: "batch_size", 1: "sequence_length"},
"sentence_embedding": {0: "batch_size"}
Usage:
#include <iostream>
#include <vector>
#include <onnxruntime_cxx_api.h>
int main() {
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "embedding");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
// Load the ONNX model
const char* model_path = "your_model.onnx";
Ort::Session session(env, model_path, session_options);
Ort::AllocatorWithDefaultOptions allocator;
// Input and output names
const char* input_names[] = {"input_ids", "attention_mask"};
const char* output_names[] = {"token_embeddings", "sentence_embedding"};
// Mock input data (e.g., batch_size=1, sequence_length=5)
std::vector<int64_t> input_ids = {101, 2009, 2003, 1037, 2742}; // Example token IDs, Use proper tokenization library to convert string to input_ids (e.g. tokenizers-cpp)
std::vector<int64_t> attention_mask = {1, 1, 1, 1, 1};
std::vector<int64_t> input_shape = {1, 5}; // batch_size=1, seq_len=5
// Create input tensors
Ort::Value input_ids_tensor = Ort::Value::CreateTensor<int64_t>(
allocator, input_ids.data(), input_ids.size(), input_shape.data(), input_shape.size());
Ort::Value attention_mask_tensor = Ort::Value::CreateTensor<int64_t>(
allocator, attention_mask.data(), attention_mask.size(), input_shape.data(), input_shape.size());
std::vector<Ort::Value> input_tensors;
input_tensors.push_back(std::move(input_ids_tensor));
input_tensors.push_back(std::move(attention_mask_tensor));
// Run inference
auto output_tensors = session.Run(Ort::RunOptions{nullptr},
input_names, input_tensors.data(), 2,
output_names, 2);
// Extract sentence embedding
float* sentence_embedding = output_tensors[1].GetTensorMutableData<float>();
size_t embedding_size = output_tensors[1].GetTensorTypeAndShapeInfo().GetElementCount();
std::cout << "Sentence Embedding:\n";
for (size_t i = 0; i < embedding_size; ++i) {
std::cout << sentence_embedding[i] << " ";
}
std::cout << std::endl;
return 0;
}
- Downloads last month
- 5
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support
Model tree for nsense/all-MiniLM-L6-v2-onnx
Base model
sentence-transformers/all-MiniLM-L6-v2