In [2]:
!pip install faiss-cpu sentence-transformers clip-anytorch torch pillow opencv-python PyMuPDF

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting clip-anytorch
  Downloading clip_anytorch-2.6.0-py3-none-any.whl.metadata (8.4 kB)
Collecting PyMuPDF
  Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting ftfy (from clip-anytorch)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata

# Part 1: Create the indexes for every type of input

In [13]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import clip
import torch
from PIL import Image

# Initialize FAISS index
dimension = 384  # embedding size for text
image_index = faiss.IndexFlatL2(512) # embedding size for images
text_index = faiss.IndexFlatL2(dimension)
text_metadata, image_metadata = [],[]

# Load models
text_encoder = SentenceTransformer("all-MiniLM-L12-v2")
clip_model, clip_preprocess = clip.load("ViT-B/32", device="cpu")

**Text embeddings**

In [14]:
texts = ["""Asparagus Soup.
Ingredients. 
2 TBSPs extra-virgin olive oil, more for drizzling. 
3 scallions, chopped. 
2 small Yukon gold potatoes, diced. 
3 garlic cloves, minced Heaping. 
1/2 TSP sea salt, more to taste. 
1/2 TSP freshly ground black pepper. 
3 cups water. 
2 cups chopped asparagus. 
1 cup frozen peas. 
1 heaping TSP Dijon mustard. 
3 TBSPs lemon juice. 
1/2 TSP lemon zest. 
1 packed cup basil.  
Method. 
Make the mint & pine nut topping by chopping together the 
pine nuts, mint, red pepper flakes, and salt. 
Set aside. 
Heat the oil in a medium pot over medium heat. 
Add the scallions and cook for 1 to 2 minutes, or until soft. 
Add the potatoes, garlic, salt, pepper, and water and simmer for 
12 minutes or until the potatoes are fork-tender. 
Add the asparagus to the pot of potatoes and continue sim- 
mering for 5 more minutes. 
Remove the pot from the heat and let cool for 5 minutes. 
Stir in the peas and then transfer the soup to a blender. 
Add the mustard, lemon juice, and lemon zest and blend until 
creamy. 
Add the basil and blend until combined.""", 
"""Traditional Irish Stew. 
Ingredients. 
2 TBSP Vegetable Oil. 
1 LB Lamb Cutlets (Bone Removed, Cut into Chunks). 
2 LBs Russet Potatoes (Peeled and Quartered). 
1 Yellow Onion (Roughly Chopped). 
1 Leek (Finely Sliced). 
2 Carrots (Roughly Chopped). 
2 TBSP All Purpose Flour. 
3 Cups Beef Stock. 
2 Cabbage Leaves (Thinly Sliced). 
1/2 TSP Salt. 
1/2 TSP Ground Black Pepper. 
Method. 
Heat oven to 350 degrees. 
In a large skillet, heat half of the oil until hot, add half of the 
lamb cubes and brown all over, about 3-4 minutes. 
Remove the browned lamb and place in a casserole dish. 
Cover the lamb in the casserole dish with half of eh potatoes, 
onion, leek and carrots. 
Add remaining oil to the skillet and brown the remaining lamb 
and add to the casserole dish on top of the vegetables, cover with the 
remaining vegetables. 
Add the flour to the still hot skillet and stir to soak up the lamb 
fat and juices. 
Cook on a low heat for another 2-4 minutes. 
Add the stock a little at a time to the flour mixture and mix until 
it is a smooth, lump free gravy. 
Pour this gravy over the lamb and vegetables. 
Add any remaining beef stock to the casserole dish, cover and cook in the preheated oven for 1 hour. 
After 1 hour, add the cabbage to the casserole dish, replace the id 
and cook for another 1 hour, check regularly to make sure the liq- 
uids have no evaporated, if the liquid is getting low, add a little 
water to the casserole dish to cover the vegetables and lamb and 
continue cooking. 
Remove the casserole dish from the oven, season with salt and 
pepper and serve the Irish stew hot in individual bowls."""]

for text in texts:
    embedding = text_encoder.encode(text)
    text_index.add(np.array([embedding]))
    text_metadata.append({"type": "text", "content": text})

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

**PDF Embeddings**

In [15]:
import fitz  # PyMuPDF

pdf_paths = ["/kaggle/input/recipes/pdf_recipes.pdf"]
for pdf_path in pdf_paths:
    doc = fitz.open(pdf_path)
    for page_num, page in enumerate(doc):
        text = page.get_text().strip()
        if text:
            embedding = text_encoder.encode(text)
            text_index.add(np.array([embedding]))
            text_metadata.append({"type": "pdf", "content": text, "page": page_num + 1, "source": pdf_path})

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

**Image embedding**

In [16]:
image_paths = ["/kaggle/input/breakfast/English_with_Hash_Browns.jpg", "/kaggle/input/fishchips/FandC.jpg"]
for path in image_paths:
    image = clip_preprocess(Image.open(path)).unsqueeze(0)
    with torch.no_grad():
        embedding = clip_model.encode_image(image).squeeze().numpy()
    image_index.add(np.array([embedding]))
    image_metadata.append({"type": "image", "path": path})

**Save indexes**

In [23]:
# Save locally
faiss.write_index(text_index, "text.index")
faiss.write_index(image_index, "image.index")

# Save metadata
import json
json.dump(text_metadata, open("text_metadata.json", "w"))
json.dump(image_metadata, open("image_metadata.json", "w"))

# Save in Ocean
faiss.write_index(text_index, "text_recipes_test_br_00.ocean")
faiss.write_index(image_index, "image_recipes_test_br_00.ocean")

# Part 2: Query + retrieval

In [31]:
import faiss
from sentence_transformers import SentenceTransformer
import clip
from PIL import Image
import torch

# Load FAISS indexes
text_index = faiss.read_index("text_recipes_test_br_00.ocean")
image_index = faiss.read_index("image_recipes_test_br_00.ocean")

# Initialize models for text and image processing
text_encoder = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2")
clip_model, clip_preprocess = clip.load("ViT-B/32", device="cpu")

def search_text(query):
    # Generate text embedding
    query_vec = text_encoder.encode(query).astype("float32")
    
    # Search in the FAISS text index, retrieve k most common
    distances, indices = text_index.search(query_vec.reshape(1, -1), k=3)
    # Format results in a structured way
    results = [
        {"content": text_metadata[idx], "distance": float(dist), "relevance_score": 1.0 / (1.0 + dist)}
        for idx, dist in zip(indices[0], distances[0])
    ]
    return results

def search_image(image_file):
    # Generate image embedding
    image = clip_preprocess(Image.open(image_file)).unsqueeze(0)
    with torch.no_grad():
        query_vec = clip_model.encode_image(image).squeeze().numpy().astype("float32")
    
    # Search in the FAISS image index, retrieve k most common
    distances, indices = image_index.search(query_vec.reshape(1, -1), k=3)
    # Format results in a structured way
    results = [
        {"content": image_metadata[idx], "distance": float(dist), "relevance_score": 1.0 / (1.0 + dist)}
        for idx, dist in zip(indices[0], distances[0])
    ]
    return results

In [30]:
# Example usage text
text_query_results = search_text("Find interesting recipes")
print("Text search results:", text_query_results)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Text search results: [{'content': {'type': 'pdf', 'content': "117\nIndex\nA\nApple\n      Crisp Pizza, 97\n      Honey, 54\n      Pudding, 97\n      Taffy- Salad, 32\nApplesauce\n      Bread, 12\n      Cake, 69\n      Muffins, 10\nB\nBanana\n      Dressing, 33\n      Nut Bread, 12\n      & Peanut Salad, 34\nBarbecue\n      Beef Meatballs, 43\n      Beef Sandwiches, 50\n      Brisket, 51\n      Ribs, 52\n      Sauce, 51, 52\n      Spareribs, 52\nBars\nCaramel, 87\nCarrot, 86\nCherry Danish, 91\nCranberry-Date, 86\nGerman Chocolate, 88\nLemon, 90, 91\nLemon Squares, 90\nMississippi Mud, 89\nOatmeal Carmelitas, 87\nOatmeal Chocolate, 88\nOh Henry, 89\nPumpkin, 85, 86\nSouthwestern, 90\nBeans\nBlack- Soup, 21\nCalico, 26\n      Dilly, 111\n      with Hamburger, 47\n      Three- Casserole, 47\nBeef\n      Barbecue- Sandwiches,\n            50\nBeef, cont.\n      -burger Specials, 46\n      El Dorado, 46\n      Stuffed French Bread,\n            49\nBeverages\nCoffee, Scandinavian\n      Egg

In [32]:
# Example usage image
image_query_results = search_image("/kaggle/input/breakfast-search/break_search.jpg")
print("Image search results:", image_query_results)

Image search results: [{'content': {'type': 'image', 'path': '/kaggle/input/breakfast/English_with_Hash_Browns.jpg'}, 'distance': 37.849029541015625, 'relevance_score': 0.02574066873264441}, {'content': {'type': 'image', 'path': '/kaggle/input/fishchips/FandC.jpg'}, 'distance': 69.11804962158203, 'relevance_score': 0.014261663086706912}, {'content': {'type': 'image', 'path': '/kaggle/input/fishchips/FandC.jpg'}, 'distance': 3.4028234663852886e+38, 'relevance_score': 2.938736052218037e-39}]
