Spaces:
Configuration error
Configuration error
File size: 4,759 Bytes
0c1233f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
from flask import Flask, request, jsonify
from flask_cors import CORS
import torch
import pandas as pd
import math
import os
os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache"
os.environ["HF_HOME"] = "/tmp/.cache"
os.makedirs("/tmp/.cache", exist_ok=True)
from sentence_transformers import SentenceTransformer, util
app = Flask(__name__)
CORS(app)
# Constants
PRODUCTS_PER_PAGE = 35
TOP_ECO_COUNT = 5
PAGE2_ECO_RATIO = 0.4
# Load model and data
print("π Loading model and data...")
model = SentenceTransformer("Ujjwal-32/Product-Recommender")
df = pd.read_csv("products_clean_updated1.csv")
product_embeddings = torch.load("embeddings_updated1.pt")
print("β
Model and embeddings loaded.")
def sanitize_product(product):
return {
k: (None if isinstance(v, float) and math.isnan(v) else v)
for k, v in product.items()
}
@app.route("/")
def home():
return "β
GreenKart Flask Server is running!"
@app.route("/search", methods=["GET"])
def search_products():
query = request.args.get("query", "").strip()
page = int(request.args.get("page", 1))
if not query:
return jsonify({"error": "Missing 'query' parameter"}), 400
# Encode query and compute similarity
query_embedding = model.encode(query, convert_to_tensor=True)
cosine_scores = util.cos_sim(query_embedding, product_embeddings)[0]
df["similarity"] = cosine_scores.cpu().numpy()
# Sort products by similarity
sorted_df = df.sort_values(by="similarity", ascending=False)
# Split into eco and non-eco
eco_df = sorted_df[
(sorted_df["isOrganic"] == True) & (sorted_df["sustainableScore"] >= 75)
].reset_index(drop=True)
non_eco_df = sorted_df[~sorted_df.index.isin(eco_df.index)].reset_index(drop=True)
if page == 1:
# Page 1: 5 top eco + 18 eco + 27 non-eco (shuffled)
top_eco = eco_df.head(TOP_ECO_COUNT)
rest_eco = eco_df.iloc[TOP_ECO_COUNT : TOP_ECO_COUNT + 18]
rest_non_eco = non_eco_df.head(27)
mixed_rest = pd.concat([rest_eco, rest_non_eco]).sample(frac=1, random_state=42)
final_df = pd.concat([top_eco, mixed_rest]).reset_index(drop=True)
else:
# Page 2 and onwards
eco_count = int(PRODUCTS_PER_PAGE * PAGE2_ECO_RATIO)
non_eco_count = PRODUCTS_PER_PAGE - eco_count
eco_offset = TOP_ECO_COUNT + 18 + (page - 2) * eco_count
non_eco_offset = 27 + (page - 2) * non_eco_count
eco_slice = eco_df.iloc[eco_offset : eco_offset + eco_count]
non_eco_slice = non_eco_df.iloc[non_eco_offset : non_eco_offset + non_eco_count]
final_df = (
pd.concat([eco_slice, non_eco_slice])
.sample(frac=1, random_state=page)
.reset_index(drop=True)
)
# β
Convert images string to list in all cases
final_result = []
for _, row in final_df.iterrows():
images = []
if isinstance(row["images"], str):
images = [img.strip() for img in row["images"].split(",") if img.strip()]
if not images:
continue # Skip if image list is empty
product = row.to_dict()
product["images"] = images
product = sanitize_product(product)
final_result.append(product)
return jsonify(final_result)
@app.route("/search-green", methods=["GET"])
def search_green_products():
query = request.args.get("query", "").strip()
page = int(request.args.get("page", 1))
if not query:
return jsonify({"error": "Missing 'query' parameter"}), 400
query_embedding = model.encode(query, convert_to_tensor=True)
cosine_scores = util.cos_sim(query_embedding, product_embeddings)[0]
df["similarity"] = cosine_scores.cpu().numpy()
sorted_eco_df = (
df[(df["isOrganic"] == True)]
.sort_values(by="similarity", ascending=False)
.reset_index(drop=True)
)
start = (page - 1) * PRODUCTS_PER_PAGE
end = start + PRODUCTS_PER_PAGE
page_df = sorted_eco_df.iloc[start:end]
final_result = []
for _, row in page_df.iterrows():
images = []
if isinstance(row["images"], str):
images = [img.strip() for img in row["images"].split(",") if img.strip()]
if not images:
continue # Skip products without valid images
product = row.to_dict()
product["images"] = images
product = sanitize_product(product)
final_result.append(product)
return jsonify(final_result)
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
app.run(host="0.0.0.0", port=port, debug=True)
|