Spaces:

NitinBot001
/

Ollama

Sleeping

Ollama / config.yaml

Create config.yaml

fcb7620 verified 2 months ago

1.01 kB

	# LocalAI Configuration for fast lightweight models
	name: tinyllama
	backend: llama
	parameters:
	# Model file - will be downloaded automatically
	model: tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf

	# Performance settings
	threads: 4
	context_size: 2048
	batch: 512

	# Speed optimizations
	f16: true
	low_vram: true

	# Generation settings for faster responses
	temperature: 0.1
	top_p: 0.9
	top_k: 10
	repeat_penalty: 1.1

	# Template for chat format
	template:
	chat: \|
	<\|system\|>
	You are a helpful assistant.</s>
	<\|user\|>
	{{.Input}}</s>
	<\|assistant\|>
	completion: \|
	{{.Input}}

	---
	name: qwen2-0.5b
	backend: llama
	parameters:
	model: qwen2-0_5b-instruct-q4_k_m.gguf
	threads: 4
	context_size: 1024
	batch: 256
	f16: true
	low_vram: true
	temperature: 0.1
	top_p: 0.9
	template:
	chat: \|
	<\|im_start\|>system
	You are a helpful assistant.<\|im_end\|>
	<\|im_start\|>user
	{{.Input}}<\|im_end\|>
	<\|im_start\|>assistant