File size: 4,759 Bytes
0c1233f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from flask import Flask, request, jsonify
from flask_cors import CORS
import torch
import pandas as pd
import math
import os

os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache"
os.environ["HF_HOME"] = "/tmp/.cache"
os.makedirs("/tmp/.cache", exist_ok=True)
from sentence_transformers import SentenceTransformer, util

app = Flask(__name__)
CORS(app)

# Constants
PRODUCTS_PER_PAGE = 35
TOP_ECO_COUNT = 5
PAGE2_ECO_RATIO = 0.4

# Load model and data
print("πŸ”„ Loading model and data...")
model = SentenceTransformer("Ujjwal-32/Product-Recommender")
df = pd.read_csv("products_clean_updated1.csv")
product_embeddings = torch.load("embeddings_updated1.pt")
print("βœ… Model and embeddings loaded.")


def sanitize_product(product):
    return {
        k: (None if isinstance(v, float) and math.isnan(v) else v)
        for k, v in product.items()
    }


@app.route("/")
def home():
    return "βœ… GreenKart Flask Server is running!"


@app.route("/search", methods=["GET"])
def search_products():
    query = request.args.get("query", "").strip()
    page = int(request.args.get("page", 1))

    if not query:
        return jsonify({"error": "Missing 'query' parameter"}), 400

    # Encode query and compute similarity
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = util.cos_sim(query_embedding, product_embeddings)[0]
    df["similarity"] = cosine_scores.cpu().numpy()

    # Sort products by similarity
    sorted_df = df.sort_values(by="similarity", ascending=False)

    # Split into eco and non-eco
    eco_df = sorted_df[
        (sorted_df["isOrganic"] == True) & (sorted_df["sustainableScore"] >= 75)
    ].reset_index(drop=True)
    non_eco_df = sorted_df[~sorted_df.index.isin(eco_df.index)].reset_index(drop=True)

    if page == 1:
        # Page 1: 5 top eco + 18 eco + 27 non-eco (shuffled)
        top_eco = eco_df.head(TOP_ECO_COUNT)
        rest_eco = eco_df.iloc[TOP_ECO_COUNT : TOP_ECO_COUNT + 18]
        rest_non_eco = non_eco_df.head(27)

        mixed_rest = pd.concat([rest_eco, rest_non_eco]).sample(frac=1, random_state=42)
        final_df = pd.concat([top_eco, mixed_rest]).reset_index(drop=True)
    else:
        # Page 2 and onwards
        eco_count = int(PRODUCTS_PER_PAGE * PAGE2_ECO_RATIO)
        non_eco_count = PRODUCTS_PER_PAGE - eco_count

        eco_offset = TOP_ECO_COUNT + 18 + (page - 2) * eco_count
        non_eco_offset = 27 + (page - 2) * non_eco_count

        eco_slice = eco_df.iloc[eco_offset : eco_offset + eco_count]
        non_eco_slice = non_eco_df.iloc[non_eco_offset : non_eco_offset + non_eco_count]

        final_df = (
            pd.concat([eco_slice, non_eco_slice])
            .sample(frac=1, random_state=page)
            .reset_index(drop=True)
        )

    # βœ… Convert images string to list in all cases
    final_result = []
    for _, row in final_df.iterrows():
        images = []
        if isinstance(row["images"], str):
            images = [img.strip() for img in row["images"].split(",") if img.strip()]
        if not images:
            continue  # Skip if image list is empty
        product = row.to_dict()
        product["images"] = images
        product = sanitize_product(product)
        final_result.append(product)

    return jsonify(final_result)


@app.route("/search-green", methods=["GET"])
def search_green_products():
    query = request.args.get("query", "").strip()
    page = int(request.args.get("page", 1))

    if not query:
        return jsonify({"error": "Missing 'query' parameter"}), 400

    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = util.cos_sim(query_embedding, product_embeddings)[0]
    df["similarity"] = cosine_scores.cpu().numpy()

    sorted_eco_df = (
        df[(df["isOrganic"] == True)]
        .sort_values(by="similarity", ascending=False)
        .reset_index(drop=True)
    )

    start = (page - 1) * PRODUCTS_PER_PAGE
    end = start + PRODUCTS_PER_PAGE
    page_df = sorted_eco_df.iloc[start:end]

    final_result = []
    for _, row in page_df.iterrows():
        images = []
        if isinstance(row["images"], str):
            images = [img.strip() for img in row["images"].split(",") if img.strip()]
        if not images:
            continue  # Skip products without valid images
        product = row.to_dict()
        product["images"] = images
        product = sanitize_product(product)
        final_result.append(product)

    return jsonify(final_result)


if __name__ == "__main__":

    port = int(os.environ.get("PORT", 7860)) 
    app.run(host="0.0.0.0", port=port, debug=True)