GENERATION_KWARGS = dict( temperature=0.2, top_p=0.95, top_k=40, repeat_penalty=1.0, ) LLAMA_MODEL_KWARGS = dict( # repo_id='bartowski/google_gemma-3-1b-it-GGUF', # filename='google_gemma-3-1b-it-Q8_0.gguf', repo_id='bartowski/Qwen_Qwen3-0.6B-GGUF', filename='Qwen_Qwen3-0.6B-Q4_K_M.gguf', local_dir='model', cache_dir='model', n_gpu_layers=-1, verbose=True, n_ctx=4096, ) SHOW_THINKING = False