Spaces:
Sleeping
Sleeping
# LocalAI Configuration for fast lightweight models | |
name: tinyllama | |
backend: llama | |
parameters: | |
# Model file - will be downloaded automatically | |
model: tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf | |
# Performance settings | |
threads: 4 | |
context_size: 2048 | |
batch: 512 | |
# Speed optimizations | |
f16: true | |
low_vram: true | |
# Generation settings for faster responses | |
temperature: 0.1 | |
top_p: 0.9 | |
top_k: 10 | |
repeat_penalty: 1.1 | |
# Template for chat format | |
template: | |
chat: | | |
<|system|> | |
You are a helpful assistant.</s> | |
<|user|> | |
{{.Input}}</s> | |
<|assistant|> | |
completion: | | |
{{.Input}} | |
name: qwen2-0.5b | |
backend: llama | |
parameters: | |
model: qwen2-0_5b-instruct-q4_k_m.gguf | |
threads: 4 | |
context_size: 1024 | |
batch: 256 | |
f16: true | |
low_vram: true | |
temperature: 0.1 | |
top_p: 0.9 | |
template: | |
chat: | | |
<|im_start|>system | |
You are a helpful assistant.<|im_end|> | |
<|im_start|>user | |
{{.Input}}<|im_end|> | |
<|im_start|>assistant |