rdsarjito
2 commit
c0cfde6
raw
history blame
8.87 kB
import streamlit as st
import torch
import torch.nn as nn
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
import numpy as np
# Set page config
st.set_page_config(
page_title="Deteksi Alergen Resep",
page_icon="🍽️",
layout="wide"
)
# App title and description
st.title("🍽️ Deteksi Alergen Resep Makanan")
st.markdown("""
Aplikasi ini dapat mendeteksi potensi alergen dalam resep makanan Indonesia.
Masukkan daftar bahan-bahan resep Anda, dan sistem akan mengidentifikasi alergen yang mungkin terkandung.
""")
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define target columns (allergens)
target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
allergen_descriptions = {
'susu': 'Produk susu (milk products)',
'kacang': 'Kacang-kacangan (nuts)',
'telur': 'Telur (eggs)',
'makanan_laut': 'Makanan laut (seafood)',
'gandum': 'Gandum/gluten (wheat/gluten)'
}
# Clean text function
@st.cache_data
def clean_text(text):
# Convert dashes to spaces for better tokenization
text = text.replace('--', ' ')
# Basic cleaning
text = re.sub(r"http\S+", "", text)
text = re.sub('\n', ' ', text)
text = re.sub("[^a-zA-Z0-9\s]", " ", text)
text = re.sub(" {2,}", " ", text)
text = text.strip()
text = text.lower()
return text
# Define model for multilabel classification
class MultilabelBertClassifier(nn.Module):
def __init__(self, model_name, num_labels):
super(MultilabelBertClassifier, self).__init__()
self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
# Replace the classification head with our own for multilabel
self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
def forward(self, input_ids, attention_mask):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
return outputs.logits
@st.cache_resource
def load_model_and_tokenizer():
try:
# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
# Initialize model
model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
# Check if model exists locally, otherwise download from Hugging Face
model_path = "alergen_model.pt"
if os.path.exists(model_path):
st.info("Loading model from local storage...")
checkpoint = torch.load(model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
else:
st.warning("Model file not found. Please upload your model file.")
model.to(device)
model.eval()
return model, tokenizer
except Exception as e:
st.error(f"Error loading model: {e}")
return None, None
# Function to predict allergens in new recipes
def predict_allergens(model, tokenizer, ingredients_text, max_length=128):
if not model or not tokenizer:
return None
# Clean the text
cleaned_text = clean_text(ingredients_text)
# Tokenize
encoding = tokenizer.encode_plus(
cleaned_text,
add_special_tokens=True,
max_length=max_length,
truncation=True,
return_tensors='pt',
padding='max_length'
)
input_ids = encoding['input_ids'].to(device)
attention_mask = encoding['attention_mask'].to(device)
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
predictions = torch.sigmoid(outputs)
predictions_np = predictions.cpu().numpy()[0]
binary_predictions = (predictions > 0.5).float().cpu().numpy()[0]
result = {}
confidence = {}
for i, target in enumerate(target_columns):
result[target] = bool(binary_predictions[i])
confidence[target] = float(predictions_np[i])
return result, confidence
# Sidebar for model upload
with st.sidebar:
st.header("Model Management")
uploaded_model = st.file_uploader("Upload model file (alergen_model.pt)", type=["pt"])
if uploaded_model is not None:
with open("alergen_model.pt", "wb") as f:
f.write(uploaded_model.getbuffer())
st.success("Model uploaded successfully!")
st.cache_resource.clear()
st.markdown("---")
st.markdown("### Tentang Aplikasi")
st.markdown("""
Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi
potensi alergen dalam resep makanan. Model dilatih untuk mendeteksi lima jenis alergen
umum dalam makanan.
""")
# Load model and tokenizer
model, tokenizer = load_model_and_tokenizer()
# Main content
st.header("Masukkan Bahan-bahan Resep")
# Text area for ingredients input
ingredients = st.text_area(
"Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
height=150,
placeholder="Contoh:\n1 bungkus Lontong homemade\n2 butir Telur ayam\n2 kotak kecil Tahu coklat\n4 butir kecil Kentang\n..."
)
# Predict button
if st.button("Deteksi Alergen", type="primary"):
if not ingredients:
st.warning("Silakan masukkan daftar bahan terlebih dahulu.")
elif not model:
st.error("Model belum tersedia. Silakan upload model terlebih dahulu.")
else:
with st.spinner("Menganalisis resep..."):
results, confidence = predict_allergens(model, tokenizer, ingredients)
if results:
st.header("Hasil Deteksi Alergen")
# Display detected allergens
detected_allergens = [allergen for allergen, present in results.items() if present]
if detected_allergens:
st.markdown("### ⚠️ Alergen Terdeteksi:")
# Create columns for the allergen cards
cols = st.columns(len(detected_allergens) if len(detected_allergens) < 3 else 3)
for i, allergen in enumerate(detected_allergens):
col_idx = i % 3
with cols[col_idx]:
st.markdown(f"""
<div style="padding: 10px; border-radius: 5px; background-color: #ffeeee; margin-bottom: 10px;">
<h4 style="color: #cc0000;">{allergen_descriptions[allergen]}</h4>
<p>Tingkat kepercayaan: {confidence[allergen]*100:.1f}%</p>
</div>
""", unsafe_allow_html=True)
else:
st.success("✅ Tidak ada alergen yang terdeteksi dalam resep ini.")
# Display detailed analysis
with st.expander("Lihat Analisis Detail"):
st.markdown("### Tingkat Kepercayaan Per Alergen")
for allergen in target_columns:
conf_value = confidence[allergen]
st.markdown(f"**{allergen_descriptions[allergen]}:** {conf_value*100:.1f}%")
st.progress(conf_value)
else:
st.error("Terjadi kesalahan dalam prediksi. Silakan coba lagi.")
# Example recipe section
with st.expander("Lihat Contoh Resep"):
st.markdown("""
**Gado-gado:**
1 bungkus Lontong homemade
2 butir Telur ayam
2 kotak kecil Tahu coklat
4 butir kecil Kentang
2 buah Tomat merah
1 buah Ketimun lalap
4 lembar Selada keriting
2 lembar Kol putih
2 porsi Saus kacang homemade
4 buah Kerupuk udang goreng
Secukupnya emping goreng
2 sdt Bawang goreng
Secukupnya Kecap manis
""")
if st.button("Gunakan Contoh Ini"):
st.session_state.example_used = True
# Will be processed in next rerun
# Handle example
if 'example_used' in st.session_state and st.session_state.example_used:
example_recipe = """1 bungkus Lontong homemade
2 butir Telur ayam
2 kotak kecil Tahu coklat
4 butir kecil Kentang
2 buah Tomat merah
1 buah Ketimun lalap
4 lembar Selada keriting
2 lembar Kol putih
2 porsi Saus kacang homemade
4 buah Kerupuk udang goreng
Secukupnya emping goreng
2 sdt Bawang goreng
Secukupnya Kecap manis"""
st.session_state.example_used = False
st.text_area(
"Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
value=example_recipe,
height=150,
key="ingredients_example"
)
# Footer
st.markdown("---")
st.markdown("*Aplikasi ini hanya untuk tujuan informasi. Silakan konsultasikan dengan ahli gizi untuk konfirmasi alergen dalam makanan.*")