File size: 8,446 Bytes
3616649 7391c07 3616649 02717d7 3616649 2420ad1 3616649 3980981 3616649 02717d7 bcc7b20 3055fa2 bdfb020 02717d7 3616649 49e5dfb 3616649 7f27ea8 3616649 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
import streamlit as st
import os
import faiss
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import pickle
from langchain_huggingface import HuggingFaceEndpoint
from transformers import AutoTokenizer, AutoModel
import torch
# Load tokenizer and model once
tokenizer = AutoTokenizer.from_pretrained('src/paraphrase-mpnet-base-v2')
model = AutoModel.from_pretrained('src/paraphrase-mpnet-base-v2')
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0] # First element is token embeddings
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
def encode(sentences):
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
model_output = model(**encoded_input)
return mean_pooling(model_output, encoded_input['attention_mask']).cpu().numpy()
# Fragrance card function
def create_fragrance_card(name, rating, brand, perfumer_text, top_notes, middle_notes, base_notes, accords_text, explanation):
# Create fragrance card HTML
card_html = f"""
<div style="border: 1px solid #ddd; padding: 15px; margin: 10px; border-radius: 15px;
background: linear-gradient(to bottom right, #ffffff, #f2f6fc);
width: 400px; color: #222; box-shadow: 0 4px 8px rgba(0,0,0,0.1);">
<h3 style="color: #3a3a3a; text-align: center;">{name} β{rating}</h3>
<p><strong>π·οΈ Brand:</strong> {brand}</p>
<p><strong>π Perfumer(s):</strong> {perfumer_text}</p>
<p><strong>πΏ Top Notes:</strong> {top_notes}</p>
<p><strong>π Heart Notes:</strong> {middle_notes}</p>
<p><strong>π² Base Notes:</strong> {base_notes}</p>
<p><strong>πΌ Main Accords:</strong> {accords_text}</p>
<p><strong>π‘ AI Explanation:</strong> {explanation}</p>
</div>
"""
return card_html
# Load FAISS database, metadata, and encoder with cache
@st.cache_resource
def load_resources():
index = faiss.read_index('src/fragrance_faiss.index')
with open('src/fragrance_metadata.pkl', 'rb') as f:
metadata = pickle.load(f)
return index, metadata
# Gets a brief explanation from Ollama for why this fragrance matches the user's query
def get_ollama_explanation(query, description):
prompt = f"""
A user is searching for a fragrance with this description: "{query}"
One recommendation is:
{description}
Explain in 1-2 sentences, in plain English, why this fragrance matches the user's query.
"""
response = llm.invoke(prompt)
return response.strip()
# Load LLM
llm = HuggingFaceEndpoint(
repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
task="text-generation",
huggingfacehub_api_token=os.environ["LLM_TOKEN"]
)
# Initialize app
st.set_page_config(page_title="Fragrance Recommendation System", layout="wide")
# Add title to top of app interface
st.title("Fragrance Recommendation System")
# Sidebar filters
st.sidebar.header("Filters")
query = st.text_input("Describe your ideal fragrance:")
col1, col2 = st.columns(2)
with col1:
k = st.slider("Number of recommendations:", 1, 10, 5)
with col2:
min_rating = st.slider("Minimum rating:", 1.0, 5.0, 3.5)
gender_filter = st.sidebar.selectbox("Gender:", ["All", "Male", "Female", "Unisex"])
brand_filter = st.sidebar.text_input("Brand (leave empty for all):", "").title()
note_filter = st.sidebar.text_input("Notes (comma-separated):", "").lower()
# Load resources
index, metadata = load_resources()
# Convert rating_values to numeric
if 'rating_value' in metadata.columns:
metadata['rating_value'] = pd.to_numeric(
metadata['rating_value'],
errors='coerce')
# Press button and start recommendations
if st.button('Get Recommendations'):
with st.spinner('Finding your fragrance recs...'):
if query == "":
st.warning("No query entered.")
else:
# Apply filters sequentially
current_df = metadata.copy()
# Gender filter
if gender_filter != "All":
current_df = current_df[current_df['gender'].str.lower() == gender_filter.lower()]
# Brand filter
if brand_filter:
current_df = current_df[current_df['brand'].str.contains(brand_filter, case=False, na=False)]
# Rating filter (with NaN handling)
if 'rating_value' in current_df.columns:
current_df = current_df[current_df['rating_value'].ge(min_rating)]
# Note filter
if note_filter:
notes = [n.strip().lower() for n in note_filter.split(",")]
def note_check(row):
note_fields = [
str(row['top']).lower() if pd.notna(row['top']) else "",
str(row['middle']).lower() if pd.notna(row['middle']) else "",
str(row['base']).lower() if pd.notna(row['base']) else ""
]
return any(note in field for note in notes for field in note_fields)
current_df = current_df[current_df.apply(note_check, axis=1)]
valid_indices = current_df.index.tolist()
# Check if any fragrances remain
if not valid_indices:
st.warning("No fragrances match all your filters. Try relaxing some criteria.")
st.stop()
# Grab the vectors for fragrances still present after the filters
filtered_vectors = np.vstack([index.reconstruct(int(idx)) for idx in valid_indices])
temp_index = faiss.IndexFlatIP(filtered_vectors.shape[1])
temp_index.add(filtered_vectors)
# Encode the query and normalize it for cosine similarity
query_vector = encode([query])
faiss.normalize_L2(query_vector)
# Perform the search and returns indices of the most similar vectors and their similarity scores
sim_score, I = temp_index.search(query_vector, min(k, len(valid_indices)))
# Get the recommened fragrance's indices and similarity score
results = [(valid_indices[i], sim_score[0][j]) for j, i in enumerate(I[0])]
# Display results
st.subheader(f"Recommended Fragrances ({len(results)} results)")
cols = st.columns(3)
for idx, (result_idx, sim_score) in enumerate(results):
rec = metadata.loc[result_idx]
# Extract data with fallbacks
name = rec.get('perfume', 'Unknown')
brand = rec.get('brand', 'Unknown')
perfumer_text = rec.get('perfumer', 'Unknown')
top_notes = rec.get('top', 'Unknown')
middle_notes = rec.get('middle', 'Unknown')
base_notes = rec.get('base', 'Unknown')
accords_text = rec.get('accord', 'Unknown')
rating = rec.get('rating_value', '?')
# Create natural language fragrance description
description = (
f"The fragrance is called {name}. It is by {brand}. "
f"The perfumer is {perfumer_text}. The top notes are {top_notes}, "
f"the heart notes are {middle_notes}, and the base notes are {base_notes}. "
f"The main accords are {accords_text}."
)
explanation = get_ollama_explanation(query, description)
# Add rating to card
card = create_fragrance_card(
name,
rating,
brand,
perfumer_text,
top_notes,
middle_notes,
base_notes,
accords_text,
explanation
)
cols[idx % 3].markdown(card, unsafe_allow_html=True)
|