model="wizardLM-7B-GGML/wizardLM-7B.ggml.q5_1.bin" use_mmap="True" use_mlock="True" n_threads="6" n_ctx="1024" n_gpu_layers="40"