Coool2 commited on
Commit
8b1644b
·
1 Parent(s): 792fede

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +25 -30
agent.py CHANGED
@@ -33,6 +33,7 @@ from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
33
  from llama_index.tools.arxiv import ArxivToolSpec
34
  from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
35
  from llama_index.core.agent.workflow import AgentWorkflow
 
36
 
37
  # Import all required official LlamaIndex Readers
38
  from llama_index.readers.file import (
@@ -48,7 +49,6 @@ try:
48
  # Gemini (for API mode)
49
  from llama_index.llms.gemini import Gemini
50
  from llama_index.embeddings.gemini import GeminiEmbedding
51
- from llama_index_llms_vllm import Vllm
52
  GEMINI_AVAILABLE = True
53
  except ImportError:
54
  GEMINI_AVAILABLE = False
@@ -120,36 +120,31 @@ def initialize_models(use_api_mode=False):
120
  # Try to use Pixtral 12B with vLLM if available
121
  pixtral_model = "Qwen/Qwen3-8B" # Fallback model
122
  try:
123
- if importlib.util.find_spec("llama_index_llms_vllm") is not None:
124
- from llama_index_llms_vllm import Vllm
125
- # Check if Pixtral 12B is accessible
126
- if os.path.exists("/path/to/pixtral-12b") or True: # Placeholder check
127
- pixtral_model = "mistralai/pixtral-12b"
128
- print(f"Using Pixtral 12B with vLLM")
129
-
130
- # Custom prompt template for Pixtral model
131
- def messages_to_prompt(messages):
132
- prompt = "\n".join([str(x) for x in messages])
133
- return f"<s>[INST] {prompt} [/INST] </s>\n"
134
-
135
- def completion_to_prompt(completion):
136
- return f"<s>[INST] {completion} [/INST] </s>\n"
137
-
138
- proj_llm = Vllm(
139
- model=pixtral_model,
140
- tensor_parallel_size=1, # Adjust based on available GPUs
141
- max_new_tokens=16000,
142
- messages_to_prompt=messages_to_prompt,
143
- completion_to_prompt=completion_to_prompt,
144
- temperature=0.6,
145
- top_p=0.95,
146
- top_k=20
147
- )
148
- else:
149
- # Use regular Qwen model if Pixtral not found
150
- raise ImportError("Pixtral 12B not found")
151
  else:
152
- raise ImportError("vLLM not available")
 
153
  except (ImportError, Exception) as e:
154
  print(f"Error loading Pixtral with vLLM: {e}")
155
  print(f"Falling back to {pixtral_model} with HuggingFace...")
 
33
  from llama_index.tools.arxiv import ArxivToolSpec
34
  from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
35
  from llama_index.core.agent.workflow import AgentWorkflow
36
+ from llama_index.llms.vllm import Vllm
37
 
38
  # Import all required official LlamaIndex Readers
39
  from llama_index.readers.file import (
 
49
  # Gemini (for API mode)
50
  from llama_index.llms.gemini import Gemini
51
  from llama_index.embeddings.gemini import GeminiEmbedding
 
52
  GEMINI_AVAILABLE = True
53
  except ImportError:
54
  GEMINI_AVAILABLE = False
 
120
  # Try to use Pixtral 12B with vLLM if available
121
  pixtral_model = "Qwen/Qwen3-8B" # Fallback model
122
  try:
123
+ if os.path.exists("/path/to/pixtral-12b") or True: # Placeholder check
124
+ pixtral_model = "mistralai/pixtral-12b"
125
+ print(f"Using Pixtral 12B with vLLM")
126
+
127
+ # Custom prompt template for Pixtral model
128
+ def messages_to_prompt(messages):
129
+ prompt = "\n".join([str(x) for x in messages])
130
+ return f"<s>[INST] {prompt} [/INST] </s>\n"
131
+
132
+ def completion_to_prompt(completion):
133
+ return f"<s>[INST] {completion} [/INST] </s>\n"
134
+
135
+ proj_llm = Vllm(
136
+ model=pixtral_model,
137
+ tensor_parallel_size=1, # Adjust based on available GPUs
138
+ max_new_tokens=16000,
139
+ messages_to_prompt=messages_to_prompt,
140
+ completion_to_prompt=completion_to_prompt,
141
+ temperature=0.6,
142
+ top_p=0.95,
143
+ top_k=20
144
+ )
 
 
 
 
 
 
145
  else:
146
+ # Use regular Qwen model if Pixtral not found
147
+ raise ImportError("Pixtral 12B not found")
148
  except (ImportError, Exception) as e:
149
  print(f"Error loading Pixtral with vLLM: {e}")
150
  print(f"Falling back to {pixtral_model} with HuggingFace...")