Spaces:

ramadn
/

allergen_detector_bert

Sleeping

rdsarjito

2 commit

c0cfde6 4 months ago

8.87 kB

	import streamlit as st
	import torch
	import torch.nn as nn
	import re
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import os
	import numpy as np

	# Set page config
	st.set_page_config(
	page_title="Deteksi Alergen Resep",
	page_icon="🍽️",
	layout="wide"
	)

	# App title and description
	st.title("🍽️ Deteksi Alergen Resep Makanan")
	st.markdown("""
	Aplikasi ini dapat mendeteksi potensi alergen dalam resep makanan Indonesia.
	Masukkan daftar bahan-bahan resep Anda, dan sistem akan mengidentifikasi alergen yang mungkin terkandung.
	""")

	# Set device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Define target columns (allergens)
	target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
	allergen_descriptions = {
	'susu': 'Produk susu (milk products)',
	'kacang': 'Kacang-kacangan (nuts)',
	'telur': 'Telur (eggs)',
	'makanan_laut': 'Makanan laut (seafood)',
	'gandum': 'Gandum/gluten (wheat/gluten)'
	}

	# Clean text function
	@st.cache_data
	def clean_text(text):
	# Convert dashes to spaces for better tokenization
	text = text.replace('--', ' ')
	# Basic cleaning
	text = re.sub(r"http\S+", "", text)
	text = re.sub('\n', ' ', text)
	text = re.sub("[^a-zA-Z0-9\s]", " ", text)
	text = re.sub(" {2,}", " ", text)
	text = text.strip()
	text = text.lower()
	return text

	# Define model for multilabel classification
	class MultilabelBertClassifier(nn.Module):
	def __init__(self, model_name, num_labels):
	super(MultilabelBertClassifier, self).__init__()
	self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
	# Replace the classification head with our own for multilabel
	self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)

	def forward(self, input_ids, attention_mask):
	outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
	return outputs.logits

	@st.cache_resource
	def load_model_and_tokenizer():
	try:
	# Initialize tokenizer
	tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')

	# Initialize model
	model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))

	# Check if model exists locally, otherwise download from Hugging Face
	model_path = "alergen_model.pt"

	if os.path.exists(model_path):
	st.info("Loading model from local storage...")
	checkpoint = torch.load(model_path, map_location=device)
	model.load_state_dict(checkpoint['model_state_dict'])
	else:
	st.warning("Model file not found. Please upload your model file.")

	model.to(device)
	model.eval()

	return model, tokenizer
	except Exception as e:
	st.error(f"Error loading model: {e}")
	return None, None

	# Function to predict allergens in new recipes
	def predict_allergens(model, tokenizer, ingredients_text, max_length=128):
	if not model or not tokenizer:
	return None

	# Clean the text
	cleaned_text = clean_text(ingredients_text)

	# Tokenize
	encoding = tokenizer.encode_plus(
	cleaned_text,
	add_special_tokens=True,
	max_length=max_length,
	truncation=True,
	return_tensors='pt',
	padding='max_length'
	)

	input_ids = encoding['input_ids'].to(device)
	attention_mask = encoding['attention_mask'].to(device)

	with torch.no_grad():
	outputs = model(input_ids=input_ids, attention_mask=attention_mask)
	predictions = torch.sigmoid(outputs)
	predictions_np = predictions.cpu().numpy()[0]
	binary_predictions = (predictions > 0.5).float().cpu().numpy()[0]

	result = {}
	confidence = {}
	for i, target in enumerate(target_columns):
	result[target] = bool(binary_predictions[i])
	confidence[target] = float(predictions_np[i])

	return result, confidence

	# Sidebar for model upload
	with st.sidebar:
	st.header("Model Management")
	uploaded_model = st.file_uploader("Upload model file (alergen_model.pt)", type=["pt"])

	if uploaded_model is not None:
	with open("alergen_model.pt", "wb") as f:
	f.write(uploaded_model.getbuffer())
	st.success("Model uploaded successfully!")
	st.cache_resource.clear()

	st.markdown("---")
	st.markdown("### Tentang Aplikasi")
	st.markdown("""
	Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi
	potensi alergen dalam resep makanan. Model dilatih untuk mendeteksi lima jenis alergen
	umum dalam makanan.
	""")

	# Load model and tokenizer
	model, tokenizer = load_model_and_tokenizer()

	# Main content
	st.header("Masukkan Bahan-bahan Resep")

	# Text area for ingredients input
	ingredients = st.text_area(
	"Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
	height=150,
	placeholder="Contoh:\n1 bungkus Lontong homemade\n2 butir Telur ayam\n2 kotak kecil Tahu coklat\n4 butir kecil Kentang\n..."
	)

	# Predict button
	if st.button("Deteksi Alergen", type="primary"):
	if not ingredients:
	st.warning("Silakan masukkan daftar bahan terlebih dahulu.")
	elif not model:
	st.error("Model belum tersedia. Silakan upload model terlebih dahulu.")
	else:
	with st.spinner("Menganalisis resep..."):
	results, confidence = predict_allergens(model, tokenizer, ingredients)

	if results:
	st.header("Hasil Deteksi Alergen")

	# Display detected allergens
	detected_allergens = [allergen for allergen, present in results.items() if present]

	if detected_allergens:
	st.markdown("### ⚠️ Alergen Terdeteksi:")

	# Create columns for the allergen cards
	cols = st.columns(len(detected_allergens) if len(detected_allergens) < 3 else 3)

	for i, allergen in enumerate(detected_allergens):
	col_idx = i % 3
	with cols[col_idx]:
	st.markdown(f"""
	<div style="padding: 10px; border-radius: 5px; background-color: #ffeeee; margin-bottom: 10px;">
	<h4 style="color: #cc0000;">{allergen_descriptions[allergen]}</h4>
	<p>Tingkat kepercayaan: {confidence[allergen]*100:.1f}%</p>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.success("✅ Tidak ada alergen yang terdeteksi dalam resep ini.")

	# Display detailed analysis
	with st.expander("Lihat Analisis Detail"):
	st.markdown("### Tingkat Kepercayaan Per Alergen")
	for allergen in target_columns:
	conf_value = confidence[allergen]
	st.markdown(f"{allergen_descriptions[allergen]}: {conf_value*100:.1f}%")
	st.progress(conf_value)
	else:
	st.error("Terjadi kesalahan dalam prediksi. Silakan coba lagi.")

	# Example recipe section
	with st.expander("Lihat Contoh Resep"):
	st.markdown("""
	Gado-gado:
	1 bungkus Lontong homemade
	2 butir Telur ayam
	2 kotak kecil Tahu coklat
	4 butir kecil Kentang
	2 buah Tomat merah
	1 buah Ketimun lalap
	4 lembar Selada keriting
	2 lembar Kol putih
	2 porsi Saus kacang homemade
	4 buah Kerupuk udang goreng
	Secukupnya emping goreng
	2 sdt Bawang goreng
	Secukupnya Kecap manis
	""")

	if st.button("Gunakan Contoh Ini"):
	st.session_state.example_used = True
	# Will be processed in next rerun

	# Handle example
	if 'example_used' in st.session_state and st.session_state.example_used:
	example_recipe = """1 bungkus Lontong homemade
	2 butir Telur ayam
	2 kotak kecil Tahu coklat
	4 butir kecil Kentang
	2 buah Tomat merah
	1 buah Ketimun lalap
	4 lembar Selada keriting
	2 lembar Kol putih
	2 porsi Saus kacang homemade
	4 buah Kerupuk udang goreng
	Secukupnya emping goreng
	2 sdt Bawang goreng
	Secukupnya Kecap manis"""

	st.session_state.example_used = False
	st.text_area(
	"Daftar Bahan (satu per baris atau dengan format yang umum digunakan)",
	value=example_recipe,
	height=150,
	key="ingredients_example"
	)

	# Footer
	st.markdown("---")
	st.markdown("Aplikasi ini hanya untuk tujuan informasi. Silakan konsultasikan dengan ahli gizi untuk konfirmasi alergen dalam makanan.")