model="wizardLM-7B/wizardLM-7B.ggml.q5_0.bin" use_mmap="True" use_mlock="True" n_threads="4"