Ollama / config.yaml
NitinBot001's picture
Create config.yaml
fcb7620 verified
# LocalAI Configuration for fast lightweight models
name: tinyllama
backend: llama
parameters:
# Model file - will be downloaded automatically
model: tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
# Performance settings
threads: 4
context_size: 2048
batch: 512
# Speed optimizations
f16: true
low_vram: true
# Generation settings for faster responses
temperature: 0.1
top_p: 0.9
top_k: 10
repeat_penalty: 1.1
# Template for chat format
template:
chat: |
<|system|>
You are a helpful assistant.</s>
<|user|>
{{.Input}}</s>
<|assistant|>
completion: |
{{.Input}}
---
name: qwen2-0.5b
backend: llama
parameters:
model: qwen2-0_5b-instruct-q4_k_m.gguf
threads: 4
context_size: 1024
batch: 256
f16: true
low_vram: true
temperature: 0.1
top_p: 0.9
template:
chat: |
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{{.Input}}<|im_end|>
<|im_start|>assistant