Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import torch.nn as nn | |
import re | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import os | |
import numpy as np | |
# Set page config | |
st.set_page_config( | |
page_title="Deteksi Alergen Resep", | |
page_icon="🍽️", | |
layout="wide" | |
) | |
# App title and description | |
st.title("🍽️ Deteksi Alergen Resep Makanan") | |
st.markdown(""" | |
Aplikasi ini dapat mendeteksi potensi alergen dalam resep makanan Indonesia. | |
Masukkan daftar bahan-bahan resep Anda, dan sistem akan mengidentifikasi alergen yang mungkin terkandung. | |
""") | |
# Set device | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Define target columns (allergens) | |
target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum'] | |
allergen_descriptions = { | |
'susu': 'Produk susu (milk products)', | |
'kacang': 'Kacang-kacangan (nuts)', | |
'telur': 'Telur (eggs)', | |
'makanan_laut': 'Makanan laut (seafood)', | |
'gandum': 'Gandum/gluten (wheat/gluten)' | |
} | |
# Clean text function | |
def clean_text(text): | |
# Convert dashes to spaces for better tokenization | |
text = text.replace('--', ' ') | |
# Basic cleaning | |
text = re.sub(r"http\S+", "", text) | |
text = re.sub('\n', ' ', text) | |
text = re.sub("[^a-zA-Z0-9\s]", " ", text) | |
text = re.sub(" {2,}", " ", text) | |
text = text.strip() | |
text = text.lower() | |
return text | |
# Define model for multilabel classification | |
class MultilabelBertClassifier(nn.Module): | |
def __init__(self, model_name, num_labels): | |
super(MultilabelBertClassifier, self).__init__() | |
self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels) | |
# Replace the classification head with our own for multilabel | |
self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) | |
def forward(self, input_ids, attention_mask): | |
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
return outputs.logits | |
def load_model_and_tokenizer(): | |
try: | |
# Initialize tokenizer | |
tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2') | |
# Initialize model | |
model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns)) | |
# Check if model exists locally, otherwise download from Hugging Face | |
model_path = "alergen_model.pt" | |
if os.path.exists(model_path): | |
st.info("Loading model from local storage...") | |
checkpoint = torch.load(model_path, map_location=device) | |
model.load_state_dict(checkpoint['model_state_dict']) | |
else: | |
st.warning("Model file not found. Please upload your model file.") | |
model.to(device) | |
model.eval() | |
return model, tokenizer | |
except Exception as e: | |
st.error(f"Error loading model: {e}") | |
return None, None | |
# Function to predict allergens in new recipes | |
def predict_allergens(model, tokenizer, ingredients_text, max_length=128): | |
if not model or not tokenizer: | |
return None | |
# Clean the text | |
cleaned_text = clean_text(ingredients_text) | |
# Tokenize | |
encoding = tokenizer.encode_plus( | |
cleaned_text, | |
add_special_tokens=True, | |
max_length=max_length, | |
truncation=True, | |
return_tensors='pt', | |
padding='max_length' | |
) | |
input_ids = encoding['input_ids'].to(device) | |
attention_mask = encoding['attention_mask'].to(device) | |
with torch.no_grad(): | |
outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
predictions = torch.sigmoid(outputs) | |
predictions_np = predictions.cpu().numpy()[0] | |
binary_predictions = (predictions > 0.5).float().cpu().numpy()[0] | |
result = {} | |
confidence = {} | |
for i, target in enumerate(target_columns): | |
result[target] = bool(binary_predictions[i]) | |
confidence[target] = float(predictions_np[i]) | |
return result, confidence | |
# Sidebar for model upload | |
with st.sidebar: | |
st.header("Model Management") | |
uploaded_model = st.file_uploader("Upload model file (alergen_model.pt)", type=["pt"]) | |
if uploaded_model is not None: | |
with open("alergen_model.pt", "wb") as f: | |
f.write(uploaded_model.getbuffer()) | |
st.success("Model uploaded successfully!") | |
st.cache_resource.clear() | |
st.markdown("---") | |
st.markdown("### Tentang Aplikasi") | |
st.markdown(""" | |
Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi | |
potensi alergen dalam resep makanan. Model dilatih untuk mendeteksi lima jenis alergen | |
umum dalam makanan. | |
""") | |
# Load model and tokenizer | |
model, tokenizer = load_model_and_tokenizer() | |
# Main content | |
st.header("Masukkan Bahan-bahan Resep") | |
# Text area for ingredients input | |
ingredients = st.text_area( | |
"Daftar Bahan (satu per baris atau dengan format yang umum digunakan)", | |
height=150, | |
placeholder="Contoh:\n1 bungkus Lontong homemade\n2 butir Telur ayam\n2 kotak kecil Tahu coklat\n4 butir kecil Kentang\n..." | |
) | |
# Predict button | |
if st.button("Deteksi Alergen", type="primary"): | |
if not ingredients: | |
st.warning("Silakan masukkan daftar bahan terlebih dahulu.") | |
elif not model: | |
st.error("Model belum tersedia. Silakan upload model terlebih dahulu.") | |
else: | |
with st.spinner("Menganalisis resep..."): | |
results, confidence = predict_allergens(model, tokenizer, ingredients) | |
if results: | |
st.header("Hasil Deteksi Alergen") | |
# Display detected allergens | |
detected_allergens = [allergen for allergen, present in results.items() if present] | |
if detected_allergens: | |
st.markdown("### ⚠️ Alergen Terdeteksi:") | |
# Create columns for the allergen cards | |
cols = st.columns(len(detected_allergens) if len(detected_allergens) < 3 else 3) | |
for i, allergen in enumerate(detected_allergens): | |
col_idx = i % 3 | |
with cols[col_idx]: | |
st.markdown(f""" | |
<div style="padding: 10px; border-radius: 5px; background-color: #ffeeee; margin-bottom: 10px;"> | |
<h4 style="color: #cc0000;">{allergen_descriptions[allergen]}</h4> | |
<p>Tingkat kepercayaan: {confidence[allergen]*100:.1f}%</p> | |
</div> | |
""", unsafe_allow_html=True) | |
else: | |
st.success("✅ Tidak ada alergen yang terdeteksi dalam resep ini.") | |
# Display detailed analysis | |
with st.expander("Lihat Analisis Detail"): | |
st.markdown("### Tingkat Kepercayaan Per Alergen") | |
for allergen in target_columns: | |
conf_value = confidence[allergen] | |
st.markdown(f"**{allergen_descriptions[allergen]}:** {conf_value*100:.1f}%") | |
st.progress(conf_value) | |
else: | |
st.error("Terjadi kesalahan dalam prediksi. Silakan coba lagi.") | |
# Example recipe section | |
with st.expander("Lihat Contoh Resep"): | |
st.markdown(""" | |
**Gado-gado:** | |
1 bungkus Lontong homemade | |
2 butir Telur ayam | |
2 kotak kecil Tahu coklat | |
4 butir kecil Kentang | |
2 buah Tomat merah | |
1 buah Ketimun lalap | |
4 lembar Selada keriting | |
2 lembar Kol putih | |
2 porsi Saus kacang homemade | |
4 buah Kerupuk udang goreng | |
Secukupnya emping goreng | |
2 sdt Bawang goreng | |
Secukupnya Kecap manis | |
""") | |
if st.button("Gunakan Contoh Ini"): | |
st.session_state.example_used = True | |
# Will be processed in next rerun | |
# Handle example | |
if 'example_used' in st.session_state and st.session_state.example_used: | |
example_recipe = """1 bungkus Lontong homemade | |
2 butir Telur ayam | |
2 kotak kecil Tahu coklat | |
4 butir kecil Kentang | |
2 buah Tomat merah | |
1 buah Ketimun lalap | |
4 lembar Selada keriting | |
2 lembar Kol putih | |
2 porsi Saus kacang homemade | |
4 buah Kerupuk udang goreng | |
Secukupnya emping goreng | |
2 sdt Bawang goreng | |
Secukupnya Kecap manis""" | |
st.session_state.example_used = False | |
st.text_area( | |
"Daftar Bahan (satu per baris atau dengan format yang umum digunakan)", | |
value=example_recipe, | |
height=150, | |
key="ingredients_example" | |
) | |
# Footer | |
st.markdown("---") | |
st.markdown("*Aplikasi ini hanya untuk tujuan informasi. Silakan konsultasikan dengan ahli gizi untuk konfirmasi alergen dalam makanan.*") |