# Test script for TEI with the converted ONNX model | |
echo "Testing Qwen3-Embedding-0.6B INT8 ONNX with TEI..." | |
echo "This model is quantized for faster CPU inference" | |
MODEL_PATH=$(pwd) | |
echo "Model path: $MODEL_PATH" | |
echo "Files in model directory:" | |
ls -la $MODEL_PATH | |
echo "" | |
echo "Expected performance improvement: 2-4x faster on CPU" | |
echo "Note: There may be a small accuracy drop (1-3%)" | |
echo "" | |
echo "To use this model with TEI:" | |
echo "1. Upload to HuggingFace Hub, or" | |
echo "2. Mount this directory in your TEI container" | |
echo "3. Update model-id in porter.yaml to point to this model" | |
echo "" | |
echo "For optimal CPU performance, set these environment variables:" | |
echo "export OMP_NUM_THREADS=$(nproc) # Use all physical cores" | |
echo "export KMP_AFFINITY=granularity=fine,compact,1,0" | |
echo "export ORT_THREAD_POOL_SIZE=$(nproc)" | |