Andrew Gockenbach commited on
Commit
de23eb7
·
1 Parent(s): c5e182a

Updated tensor parallel size

Browse files
Files changed (1) hide show
  1. app.py +1 -5
app.py CHANGED
@@ -1,8 +1,4 @@
1
  import os
2
- import json
3
- import subprocess
4
- import time
5
- import signal
6
  import atexit
7
  import torch
8
 
@@ -41,7 +37,7 @@ class VllmApiServer:
41
  "--enable-lora",
42
  "--lora-modules", f"adapter={self.adapter_path}",
43
  "--max-lora-rank", "64",
44
- "--tensor-parallel-size", "1"
45
  ]
46
 
47
  print(f"Starting vLLM server with command: {' '.join(cmd)}")
 
1
  import os
 
 
 
 
2
  import atexit
3
  import torch
4
 
 
37
  "--enable-lora",
38
  "--lora-modules", f"adapter={self.adapter_path}",
39
  "--max-lora-rank", "64",
40
+ "--tensor-parallel-size", "4"
41
  ]
42
 
43
  print(f"Starting vLLM server with command: {' '.join(cmd)}")