Coool2 commited on
Commit
798378e
·
1 Parent(s): a60285b

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +3 -3
agent.py CHANGED
@@ -138,7 +138,7 @@ def initialize_models(use_api_mode=False):
138
  def __init__(self, **kwargs):
139
  super().__init__(**kwargs)
140
  self._model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
141
- self.model_name, torch_dtype=torch.bfloat16, device_map='auto'
142
  )
143
  self._processor = AutoProcessor.from_pretrained(self.model_name)
144
 
@@ -200,14 +200,14 @@ def initialize_models(use_api_mode=False):
200
 
201
  embed_model = HuggingFaceEmbedding(
202
  model_name="llamaindex/vdr-2b-multi-v1",
203
- device="cuda",
204
  trust_remote_code = True)
205
 
206
  # Code LLM
207
  code_llm = HuggingFaceLLM(
208
  model_name="Qwen/Qwen2.5-Coder-1.5B-Instruct",
209
  tokenizer_name="Qwen/Qwen2.5-Coder-1.5B-Instruct",
210
- device_map="cpu",
211
  model_kwargs={"torch_dtype": "auto"},
212
  generate_kwargs={"do_sample": False}
213
  )
 
138
  def __init__(self, **kwargs):
139
  super().__init__(**kwargs)
140
  self._model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
141
+ self.model_name, torch_dtype=torch.bfloat16, device_map='balanced'
142
  )
143
  self._processor = AutoProcessor.from_pretrained(self.model_name)
144
 
 
200
 
201
  embed_model = HuggingFaceEmbedding(
202
  model_name="llamaindex/vdr-2b-multi-v1",
203
+ device="cuda:0",
204
  trust_remote_code = True)
205
 
206
  # Code LLM
207
  code_llm = HuggingFaceLLM(
208
  model_name="Qwen/Qwen2.5-Coder-1.5B-Instruct",
209
  tokenizer_name="Qwen/Qwen2.5-Coder-1.5B-Instruct",
210
+ device_map="cuda:1",
211
  model_kwargs={"torch_dtype": "auto"},
212
  generate_kwargs={"do_sample": False}
213
  )