qwerty45-uiop commited on
Commit
dcec7ff
Β·
verified Β·
1 Parent(s): d1f7b63

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +33 -72
src/streamlit_app.py CHANGED
@@ -25,7 +25,7 @@ def load_data():
25
  "src/BITS_INTERNS.xlsx",
26
  "src/Summer of AI - ICFAI (Responses) (3).xlsx"
27
  ]
28
-
29
  combined_df = pd.DataFrame()
30
  for path in paths:
31
  try:
@@ -36,39 +36,34 @@ def load_data():
36
  return None, f"Excel file '{path}' not found. Please upload the file."
37
  except Exception as e:
38
  return None, f"Error loading '{path}': {str(e)}"
39
-
40
  if combined_df.empty:
41
  return None, "No data found in either file."
42
-
43
- return combined_df, None
44
 
 
45
 
46
  def extract_numeric_ram(ram) -> Optional[int]:
47
  if pd.isna(ram):
48
  return None
49
-
50
  ram_str = str(ram).lower().replace(" ", "")
51
-
52
- # Handle GB format
53
  gb_match = re.search(r"(\d+(?:\.\d+)?)(?:gb|g)", ram_str)
54
  if gb_match:
55
  return int(float(gb_match.group(1)))
56
-
57
- # Handle MB format
58
  mb_match = re.search(r"(\d+)(?:mb|m)", ram_str)
59
  if mb_match:
60
  return max(1, int(int(mb_match.group(1)) / 1024))
61
-
62
- # Handle plain numbers
63
  plain_match = re.search(r"(\d+)", ram_str)
64
  if plain_match:
65
  return int(plain_match.group(1))
66
-
67
  return None
68
 
69
- # Simplified LLM database
70
  LLM_DATABASE = {
71
- "ultra_low": { # ≀2GB
72
  "general": [
73
  {"name": "TinyLlama-1.1B-Chat", "size": "637MB", "description": "Compact chat model"},
74
  {"name": "all-MiniLM-L6-v2", "size": "91MB", "description": "Sentence embeddings"}
@@ -77,7 +72,7 @@ LLM_DATABASE = {
77
  {"name": "CodeT5-small", "size": "242MB", "description": "Code generation"}
78
  ]
79
  },
80
- "low": { # 3-4GB
81
  "general": [
82
  {"name": "Phi-1.5", "size": "2.8GB", "description": "Microsoft's efficient model"},
83
  {"name": "Gemma-2B", "size": "1.4GB", "description": "Google's compact model"}
@@ -86,7 +81,7 @@ LLM_DATABASE = {
86
  {"name": "CodeGen-2B", "size": "1.8GB", "description": "Salesforce code model"}
87
  ]
88
  },
89
- "moderate": { # 5-8GB
90
  "general": [
91
  {"name": "Llama-2-7B-Chat", "size": "3.5GB", "description": "Meta's popular chat model"},
92
  {"name": "Mistral-7B-Instruct-v0.2", "size": "4.1GB", "description": "Latest Mistral instruct"}
@@ -95,7 +90,7 @@ LLM_DATABASE = {
95
  {"name": "CodeLlama-7B-Instruct", "size": "3.8GB", "description": "Instruction-tuned CodeLlama"}
96
  ]
97
  },
98
- "good": { # 9-16GB
99
  "general": [
100
  {"name": "Llama-2-13B-Chat", "size": "7.3GB", "description": "Larger Llama variant"},
101
  {"name": "OpenChat-3.5", "size": "7.1GB", "description": "High-quality chat model"}
@@ -104,7 +99,7 @@ LLM_DATABASE = {
104
  {"name": "CodeLlama-13B-Instruct", "size": "7.3GB", "description": "Larger code model"}
105
  ]
106
  },
107
- "high": { # 17-32GB
108
  "general": [
109
  {"name": "Mixtral-8x7B-Instruct-v0.1", "size": "26.9GB", "description": "Mixture of experts"},
110
  {"name": "Yi-34B-Chat", "size": "19.5GB", "description": "01.AI's large model"}
@@ -113,7 +108,7 @@ LLM_DATABASE = {
113
  {"name": "CodeLlama-34B-Instruct", "size": "19.0GB", "description": "Large code specialist"}
114
  ]
115
  },
116
- "ultra_high": { # >32GB
117
  "general": [
118
  {"name": "Llama-2-70B", "size": "130GB", "description": "Full precision"},
119
  {"name": "Mixtral-8x22B", "size": "176GB", "description": "Latest mixture model"}
@@ -122,12 +117,9 @@ LLM_DATABASE = {
122
  }
123
 
124
  def recommend_llm(ram_str) -> Tuple[str, str, Dict[str, List[Dict]]]:
125
- """Returns (recommendation, performance_tier, detailed_models)"""
126
  ram = extract_numeric_ram(ram_str)
127
-
128
  if ram is None:
129
  return "βšͺ Check exact specs", "Unknown", {}
130
-
131
  if ram <= 2:
132
  return "πŸ”Έ Ultra-lightweight models", "Ultra Low", LLM_DATABASE["ultra_low"]
133
  elif ram <= 4:
@@ -142,10 +134,8 @@ def recommend_llm(ram_str) -> Tuple[str, str, Dict[str, List[Dict]]]:
142
  return "πŸ”΅ Top-tier models", "Ultra High", LLM_DATABASE["ultra_high"]
143
 
144
  def get_os_info(os_name) -> Tuple[str, str]:
145
- """Returns (icon, clean_name)"""
146
  if pd.isna(os_name):
147
- return "πŸ’»", "Not specified"
148
-
149
  os = str(os_name).lower()
150
  if "windows" in os:
151
  return "πŸͺŸ", os_name
@@ -158,17 +148,14 @@ def get_os_info(os_name) -> Tuple[str, str]:
158
  elif "ios" in os:
159
  return "πŸ“±", os_name
160
  else:
161
- return "πŸ’»", os_name
162
 
163
  def create_performance_chart(df):
164
- """Create RAM distribution chart"""
165
  laptop_rams = df["Laptop RAM"].apply(extract_numeric_ram).dropna()
166
  mobile_rams = df["Mobile RAM"].apply(extract_numeric_ram).dropna()
167
-
168
  fig = go.Figure()
169
  fig.add_trace(go.Histogram(x=laptop_rams, name="Laptop RAM", opacity=0.7))
170
  fig.add_trace(go.Histogram(x=mobile_rams, name="Mobile RAM", opacity=0.7))
171
-
172
  fig.update_layout(
173
  title="RAM Distribution",
174
  xaxis_title="RAM (GB)",
@@ -179,121 +166,96 @@ def create_performance_chart(df):
179
  return fig
180
 
181
  def display_models(models_dict: Dict[str, List[Dict]]):
182
- """Display models by category"""
183
  if not models_dict:
184
  return
185
-
186
  for category, model_list in models_dict.items():
187
  if model_list:
188
  st.markdown(f"**{category.title()} Models:**")
189
- for model in model_list[:5]: # Limit to 5 per category
190
  st.write(f"β€’ {model['name']} ({model['size']}) - {model['description']}")
191
 
192
- # Main App
193
  st.title("🧠 LLM Compatibility Advisor")
194
  st.markdown("Get personalized AI model recommendations with download sizes!")
195
 
196
- # Load data
197
  df, error = load_data()
198
-
199
  if error:
200
  st.error(error)
201
  st.stop()
202
-
203
  if df is None or df.empty:
204
  st.error("No data found.")
205
  st.stop()
206
 
207
- # Sidebar
208
  with st.sidebar:
209
  st.header("πŸ“Š Quick Stats")
210
  st.metric("Total Students", len(df))
211
-
212
  avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
213
  avg_mobile_ram = df["Mobile RAM"].apply(extract_numeric_ram).mean()
214
-
215
  if not pd.isna(avg_laptop_ram):
216
  st.metric("Avg Laptop RAM", f"{avg_laptop_ram:.1f} GB")
217
  if not pd.isna(avg_mobile_ram):
218
  st.metric("Avg Mobile RAM", f"{avg_mobile_ram:.1f} GB")
219
 
220
- # Individual Analysis
221
  st.subheader("πŸ‘€ Individual Student Analysis")
222
-
223
- # Fixed selectbox - use index-based selection
224
  student_names = list(df["Full Name"].unique())
225
  student_options = ["Select a student..."] + student_names
226
 
227
- selected_index = st.selectbox(
228
  "Choose a student:",
229
- options=range(len(student_options)),
230
- format_func=lambda x: student_options[x]
231
  )
232
 
233
- if selected_index > 0: # If not the placeholder option
234
- selected_user = student_names[selected_index - 1]
235
  user_data = df[df["Full Name"] == selected_user].iloc[0]
236
-
237
  col1, col2 = st.columns(2)
238
-
239
  with col1:
240
  st.markdown("### πŸ’» Laptop")
241
  laptop_os_icon, laptop_os_name = get_os_info(user_data.get('Laptop Operating System'))
242
  laptop_ram = user_data.get('Laptop RAM', 'Not specified')
243
- laptop_rec, laptop_tier, laptop_models = recommend_llm(laptop_ram)
244
-
245
  st.markdown(f"**OS:** {laptop_os_icon} {laptop_os_name}")
246
  st.markdown(f"**RAM:** {laptop_ram}")
247
  st.success(f"**Recommendation:** {laptop_rec}")
248
-
249
  display_models(laptop_models)
250
-
251
  with col2:
252
  st.markdown("### πŸ“± Mobile")
253
  mobile_os_icon, mobile_os_name = get_os_info(user_data.get('Mobile Operating System'))
254
  mobile_ram = user_data.get('Mobile RAM', 'Not specified')
255
- mobile_rec, mobile_tier, mobile_models = recommend_llm(mobile_ram)
256
-
257
  st.markdown(f"**OS:** {mobile_os_icon} {mobile_os_name}")
258
  st.markdown(f"**RAM:** {mobile_ram}")
259
  st.success(f"**Recommendation:** {mobile_rec}")
260
-
261
  display_models(mobile_models)
262
 
263
- # Batch Analysis
264
  st.markdown("---")
265
  st.header("πŸ“Š Batch Analysis")
266
-
267
- # Create summary table
268
  df_display = df[["Full Name", "Laptop RAM", "Mobile RAM"]].copy()
269
  df_display["Laptop Recommendation"] = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[0])
270
  df_display["Mobile Recommendation"] = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[0])
271
-
272
  st.dataframe(df_display, use_container_width=True)
273
 
274
- # Performance chart
275
  if len(df) > 1:
276
  st.subheader("πŸ“ˆ RAM Distribution")
277
  fig = create_performance_chart(df)
278
  st.plotly_chart(fig, use_container_width=True)
279
 
280
- # Model Explorer
281
  st.markdown("---")
282
  st.header("πŸ” Model Explorer")
283
-
284
  selected_ram_range = st.selectbox(
285
  "Select RAM range:",
286
- ["≀2GB (Ultra Low)", "3-4GB (Low)", "5-8GB (Moderate)",
287
  "9-16GB (Good)", "17-32GB (High)", ">32GB (Ultra High)"]
288
  )
289
 
290
- # Map selection to database
291
  ram_mapping = {
292
  "≀2GB (Ultra Low)": "ultra_low",
293
  "3-4GB (Low)": "low",
294
  "5-8GB (Moderate)": "moderate",
295
  "9-16GB (Good)": "good",
296
- "17-32GB (High)": " high ",
297
  ">32GB (Ultra High)": "ultra_high"
298
  }
299
 
@@ -302,25 +264,24 @@ if selected_key in LLM_DATABASE:
302
  st.subheader(f"Models for {selected_ram_range}")
303
  display_models(LLM_DATABASE[selected_key])
304
 
305
- # Quick reference
306
  with st.expander("πŸ“˜ Quick Reference"):
307
  st.markdown("""
308
  ## Popular Models by Category
309
-
310
  **General Purpose:**
311
  - Llama-2 Series (7B, 13B, 70B)
312
  - Mistral Series
313
  - Gemma (2B, 7B)
314
-
315
  **Code Specialists:**
316
  - CodeLlama
317
  - CodeGen
318
-
319
  **Where to Download:**
320
  - πŸ€— Hugging Face Hub
321
- - πŸ¦™ Ollama
322
  - πŸ“¦ LM Studio
323
  """)
324
 
325
  st.markdown("---")
326
- st.markdown("*Built for BITS Pilani Interns*")
 
25
  "src/BITS_INTERNS.xlsx",
26
  "src/Summer of AI - ICFAI (Responses) (3).xlsx"
27
  ]
28
+
29
  combined_df = pd.DataFrame()
30
  for path in paths:
31
  try:
 
36
  return None, f"Excel file '{path}' not found. Please upload the file."
37
  except Exception as e:
38
  return None, f"Error loading '{path}': {str(e)}"
39
+
40
  if combined_df.empty:
41
  return None, "No data found in either file."
 
 
42
 
43
+ return combined_df, None
44
 
45
  def extract_numeric_ram(ram) -> Optional[int]:
46
  if pd.isna(ram):
47
  return None
48
+
49
  ram_str = str(ram).lower().replace(" ", "")
50
+
 
51
  gb_match = re.search(r"(\d+(?:\.\d+)?)(?:gb|g)", ram_str)
52
  if gb_match:
53
  return int(float(gb_match.group(1)))
54
+
 
55
  mb_match = re.search(r"(\d+)(?:mb|m)", ram_str)
56
  if mb_match:
57
  return max(1, int(int(mb_match.group(1)) / 1024))
58
+
 
59
  plain_match = re.search(r"(\d+)", ram_str)
60
  if plain_match:
61
  return int(plain_match.group(1))
62
+
63
  return None
64
 
 
65
  LLM_DATABASE = {
66
+ "ultra_low": {
67
  "general": [
68
  {"name": "TinyLlama-1.1B-Chat", "size": "637MB", "description": "Compact chat model"},
69
  {"name": "all-MiniLM-L6-v2", "size": "91MB", "description": "Sentence embeddings"}
 
72
  {"name": "CodeT5-small", "size": "242MB", "description": "Code generation"}
73
  ]
74
  },
75
+ "low": {
76
  "general": [
77
  {"name": "Phi-1.5", "size": "2.8GB", "description": "Microsoft's efficient model"},
78
  {"name": "Gemma-2B", "size": "1.4GB", "description": "Google's compact model"}
 
81
  {"name": "CodeGen-2B", "size": "1.8GB", "description": "Salesforce code model"}
82
  ]
83
  },
84
+ "moderate": {
85
  "general": [
86
  {"name": "Llama-2-7B-Chat", "size": "3.5GB", "description": "Meta's popular chat model"},
87
  {"name": "Mistral-7B-Instruct-v0.2", "size": "4.1GB", "description": "Latest Mistral instruct"}
 
90
  {"name": "CodeLlama-7B-Instruct", "size": "3.8GB", "description": "Instruction-tuned CodeLlama"}
91
  ]
92
  },
93
+ "good": {
94
  "general": [
95
  {"name": "Llama-2-13B-Chat", "size": "7.3GB", "description": "Larger Llama variant"},
96
  {"name": "OpenChat-3.5", "size": "7.1GB", "description": "High-quality chat model"}
 
99
  {"name": "CodeLlama-13B-Instruct", "size": "7.3GB", "description": "Larger code model"}
100
  ]
101
  },
102
+ "high": {
103
  "general": [
104
  {"name": "Mixtral-8x7B-Instruct-v0.1", "size": "26.9GB", "description": "Mixture of experts"},
105
  {"name": "Yi-34B-Chat", "size": "19.5GB", "description": "01.AI's large model"}
 
108
  {"name": "CodeLlama-34B-Instruct", "size": "19.0GB", "description": "Large code specialist"}
109
  ]
110
  },
111
+ "ultra_high": {
112
  "general": [
113
  {"name": "Llama-2-70B", "size": "130GB", "description": "Full precision"},
114
  {"name": "Mixtral-8x22B", "size": "176GB", "description": "Latest mixture model"}
 
117
  }
118
 
119
  def recommend_llm(ram_str) -> Tuple[str, str, Dict[str, List[Dict]]]:
 
120
  ram = extract_numeric_ram(ram_str)
 
121
  if ram is None:
122
  return "βšͺ Check exact specs", "Unknown", {}
 
123
  if ram <= 2:
124
  return "πŸ”Έ Ultra-lightweight models", "Ultra Low", LLM_DATABASE["ultra_low"]
125
  elif ram <= 4:
 
134
  return "πŸ”΅ Top-tier models", "Ultra High", LLM_DATABASE["ultra_high"]
135
 
136
  def get_os_info(os_name) -> Tuple[str, str]:
 
137
  if pd.isna(os_name):
138
+ return "πŸ–Ό", "Not specified"
 
139
  os = str(os_name).lower()
140
  if "windows" in os:
141
  return "πŸͺŸ", os_name
 
148
  elif "ios" in os:
149
  return "πŸ“±", os_name
150
  else:
151
+ return "πŸ–Ό", os_name
152
 
153
  def create_performance_chart(df):
 
154
  laptop_rams = df["Laptop RAM"].apply(extract_numeric_ram).dropna()
155
  mobile_rams = df["Mobile RAM"].apply(extract_numeric_ram).dropna()
 
156
  fig = go.Figure()
157
  fig.add_trace(go.Histogram(x=laptop_rams, name="Laptop RAM", opacity=0.7))
158
  fig.add_trace(go.Histogram(x=mobile_rams, name="Mobile RAM", opacity=0.7))
 
159
  fig.update_layout(
160
  title="RAM Distribution",
161
  xaxis_title="RAM (GB)",
 
166
  return fig
167
 
168
  def display_models(models_dict: Dict[str, List[Dict]]):
 
169
  if not models_dict:
170
  return
 
171
  for category, model_list in models_dict.items():
172
  if model_list:
173
  st.markdown(f"**{category.title()} Models:**")
174
+ for model in model_list[:5]:
175
  st.write(f"β€’ {model['name']} ({model['size']}) - {model['description']}")
176
 
 
177
  st.title("🧠 LLM Compatibility Advisor")
178
  st.markdown("Get personalized AI model recommendations with download sizes!")
179
 
 
180
  df, error = load_data()
 
181
  if error:
182
  st.error(error)
183
  st.stop()
 
184
  if df is None or df.empty:
185
  st.error("No data found.")
186
  st.stop()
187
 
 
188
  with st.sidebar:
189
  st.header("πŸ“Š Quick Stats")
190
  st.metric("Total Students", len(df))
 
191
  avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean()
192
  avg_mobile_ram = df["Mobile RAM"].apply(extract_numeric_ram).mean()
 
193
  if not pd.isna(avg_laptop_ram):
194
  st.metric("Avg Laptop RAM", f"{avg_laptop_ram:.1f} GB")
195
  if not pd.isna(avg_mobile_ram):
196
  st.metric("Avg Mobile RAM", f"{avg_mobile_ram:.1f} GB")
197
 
 
198
  st.subheader("πŸ‘€ Individual Student Analysis")
 
 
199
  student_names = list(df["Full Name"].unique())
200
  student_options = ["Select a student..."] + student_names
201
 
202
+ selected_name = st.selectbox(
203
  "Choose a student:",
204
+ options=student_options,
 
205
  )
206
 
207
+ if selected_name != "Select a student...":
208
+ selected_user = selected_name
209
  user_data = df[df["Full Name"] == selected_user].iloc[0]
210
+
211
  col1, col2 = st.columns(2)
212
+
213
  with col1:
214
  st.markdown("### πŸ’» Laptop")
215
  laptop_os_icon, laptop_os_name = get_os_info(user_data.get('Laptop Operating System'))
216
  laptop_ram = user_data.get('Laptop RAM', 'Not specified')
217
+ laptop_rec, _, laptop_models = recommend_llm(laptop_ram)
 
218
  st.markdown(f"**OS:** {laptop_os_icon} {laptop_os_name}")
219
  st.markdown(f"**RAM:** {laptop_ram}")
220
  st.success(f"**Recommendation:** {laptop_rec}")
 
221
  display_models(laptop_models)
222
+
223
  with col2:
224
  st.markdown("### πŸ“± Mobile")
225
  mobile_os_icon, mobile_os_name = get_os_info(user_data.get('Mobile Operating System'))
226
  mobile_ram = user_data.get('Mobile RAM', 'Not specified')
227
+ mobile_rec, _, mobile_models = recommend_llm(mobile_ram)
 
228
  st.markdown(f"**OS:** {mobile_os_icon} {mobile_os_name}")
229
  st.markdown(f"**RAM:** {mobile_ram}")
230
  st.success(f"**Recommendation:** {mobile_rec}")
 
231
  display_models(mobile_models)
232
 
 
233
  st.markdown("---")
234
  st.header("πŸ“Š Batch Analysis")
 
 
235
  df_display = df[["Full Name", "Laptop RAM", "Mobile RAM"]].copy()
236
  df_display["Laptop Recommendation"] = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[0])
237
  df_display["Mobile Recommendation"] = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[0])
 
238
  st.dataframe(df_display, use_container_width=True)
239
 
 
240
  if len(df) > 1:
241
  st.subheader("πŸ“ˆ RAM Distribution")
242
  fig = create_performance_chart(df)
243
  st.plotly_chart(fig, use_container_width=True)
244
 
 
245
  st.markdown("---")
246
  st.header("πŸ” Model Explorer")
 
247
  selected_ram_range = st.selectbox(
248
  "Select RAM range:",
249
+ ["\u22642GB (Ultra Low)", "3-4GB (Low)", "5-8GB (Moderate)",
250
  "9-16GB (Good)", "17-32GB (High)", ">32GB (Ultra High)"]
251
  )
252
 
 
253
  ram_mapping = {
254
  "≀2GB (Ultra Low)": "ultra_low",
255
  "3-4GB (Low)": "low",
256
  "5-8GB (Moderate)": "moderate",
257
  "9-16GB (Good)": "good",
258
+ "17-32GB (High)": "high",
259
  ">32GB (Ultra High)": "ultra_high"
260
  }
261
 
 
264
  st.subheader(f"Models for {selected_ram_range}")
265
  display_models(LLM_DATABASE[selected_key])
266
 
 
267
  with st.expander("πŸ“˜ Quick Reference"):
268
  st.markdown("""
269
  ## Popular Models by Category
270
+
271
  **General Purpose:**
272
  - Llama-2 Series (7B, 13B, 70B)
273
  - Mistral Series
274
  - Gemma (2B, 7B)
275
+
276
  **Code Specialists:**
277
  - CodeLlama
278
  - CodeGen
279
+
280
  **Where to Download:**
281
  - πŸ€— Hugging Face Hub
282
+ - πŸ§™οΈ Ollama
283
  - πŸ“¦ LM Studio
284
  """)
285
 
286
  st.markdown("---")
287
+ st.markdown("*Built for BITS Pilani Interns*")