import streamlit as st import torch from transformers import BertForSequenceClassification, BertTokenizerFast from transformers import AutoModelForSequenceClassification, AutoTokenizer import time import pandas as pd import base64 from PIL import Image, ImageDraw, ImageFont import io import streamlit.components.v1 as components # Set page configuration st.set_page_config( page_title="SMS Spam Guard", page_icon="🛡️", layout="wide", initial_sidebar_state="expanded" ) # New function to create a tech-themed Spam Guard logo def create_spam_guard_logo(): width, height = 200, 200 img = Image.new('RGBA', (width, height), (0,0,0,0)) # Transparent background draw = ImageDraw.Draw(img) # Flat Design Colors (slightly adjusted for modern flat look) primary_blue = (20, 120, 220) # A strong, modern blue accent_green = (0, 200, 150) # A vibrant, techy teal/green light_accent_blue = (100, 180, 240) # Lighter blue for highlights or secondary elements white_color = (255, 255, 255) dark_gray_text = (50, 50, 50) # For subtle text if needed # Background: A subtle gradient or a clean shape # Option 1: Clean circle as base # draw.ellipse([(10, 10), (width - 10, height - 10)], fill=primary_blue) # Option 2: Modern, slightly rounded rectangle or abstract shape # For a more abstract, less shield-like, but still contained feel: # Let's try a stylized hexagon or a shape made of intersecting elements. # Design: Abstract interlocking shapes suggesting SG or a data block / shield # Main body - a dynamic shape path = [ (width * 0.15, height * 0.2), (width * 0.85, height * 0.2), (width * 0.75, height * 0.8), (width * 0.25, height * 0.8) ] draw.polygon(path, fill=primary_blue) # Accent element (e.g., a stylized 'S' or a connecting line) draw.line([ (width * 0.3, height * 0.35), (width * 0.7, height * 0.35), (width * 0.7, height * 0.5), (width * 0.3, height * 0.5), (width * 0.3, height * 0.65), (width * 0.7, height * 0.65) ], fill=accent_green, width=18, joint="miter") # Adding a subtle highlight or secondary shape for depth (still flat) draw.polygon([ (width * 0.18, height * 0.22), (width * 0.82, height * 0.22), (width * 0.72, height * 0.78), (width * 0.28, height * 0.78) ], outline=light_accent_blue, width=4) # Text "SG" - Clean, modern, sans-serif font try: # Attempt to load a more modern, geometric font if available # For example, 'Montserrat-Bold.ttf' or 'Roboto-Medium.ttf' # If not, Arial Bold is a safe fallback. font = ImageFont.truetype("arialbd.ttf", 70) # Arial Bold as a fallback except IOError: font = ImageFont.load_default() # Fallback text = "SG" text_bbox = draw.textbbox((0,0), text, font=font) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] text_x = (width - text_width) / 2 # text_y = (height - text_height) / 2 # Slightly adjust y if the accent green takes up visual center text_y = (height - text_height) / 2 + 5 # Adjusted to better center with the green shape # Make text white and prominent draw.text((text_x, text_y), text, font=font, fill=white_color) buffered = io.BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return f"data:image/png;base64,{img_str}" # Custom CSS for styling with China Mobile colors st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def load_language_model(): """Load the language detection model""" model_name = "papluca/xlm-roberta-base-language-detection" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) return tokenizer, model @st.cache_resource def load_spam_model(): """Load the fine-tuned BERT spam detection model""" model_path = "chjivan/final" tokenizer = BertTokenizerFast.from_pretrained(model_path) model = BertForSequenceClassification.from_pretrained(model_path) return tokenizer, model def detect_language(text, tokenizer, model): """Detect the language of the input text""" inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probabilities = torch.softmax(logits, dim=1)[0] predicted_class_id = torch.argmax(probabilities).item() predicted_language = model.config.id2label[predicted_class_id] confidence = probabilities[predicted_class_id].item() top_3_indices = torch.topk(probabilities, 3).indices.tolist() top_3_probs = torch.topk(probabilities, 3).values.tolist() top_3_langs = [(model.config.id2label[idx], prob) for idx, prob in zip(top_3_indices, top_3_probs)] return predicted_language, confidence, top_3_langs def classify_spam(text, tokenizer, model): """Classify the input text as spam or ham""" inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probabilities = torch.softmax(logits, dim=1)[0] predicted_class_id = torch.argmax(probabilities).item() confidence = probabilities[predicted_class_id].item() is_spam = predicted_class_id == 1 return is_spam, confidence # Get the new Spam Guard logo logo_data = create_spam_guard_logo() # Add custom CSS animations (ensure this is defined before use) st.markdown(""" """, unsafe_allow_html=True) # Load both models with st.spinner("Loading models... This may take a moment."): lang_tokenizer, lang_model = load_language_model() spam_tokenizer, spam_model = load_spam_model() # App Header with new logo st.markdown(f"""
Intelligent SMS Filtering Assistant by China Mobile Communications Group Co.,Ltd
Analyzing your message...