# app.py - Optimized for Hugging Face Spaces import gradio as gr import os import logging import tempfile import numpy as np from typing import List, Dict, Any, Optional, Tuple import warnings warnings.filterwarnings("ignore") # For Spaces, we need to handle optional imports gracefully try: import plotly.graph_objects as go import plotly.express as px from sklearn.metrics.pairwise import cosine_similarity from sklearn.decomposition import PCA ADVANCED_VIZ = True except ImportError: ADVANCED_VIZ = False print("⚠️ Advanced visualization not available - install plotly and scikit-learn") try: import scipy.io.wavfile SCIPY_AVAILABLE = True except ImportError: SCIPY_AVAILABLE = False print("⚠️ Audio processing limited - scipy not available") # ══════════════════════════════════════════════════════════════════════════════ # 🚀 HUGGING FACE SPACES OPTIMIZED AI RESEARCH DEMO # ══════════════════════════════════════════════════════════════════════════════ try: from huggingface_hub import InferenceClient HF_AVAILABLE = True except ImportError: HF_AVAILABLE = False print("❌ HuggingFace Hub not available") # Spaces-optimized logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # ══════════════════════════════════════════════════════════════════════════════ # 🔧 SPACES-OPTIMIZED CLIENT INITIALIZATION # ══════════════════════════════════════════════════════════════════════════════ def get_client(): """Spaces-optimized client initialization using official HF Inference provider.""" if not HF_AVAILABLE: logger.error("❌ HuggingFace Hub not available") return None # Debug: Check all environment variables for tokens logger.info("🔍 Debugging token detection...") # Method 1: Check HF_API_TOKEN (your current setup) api_token = os.getenv("HF_API_TOKEN") if api_token: logger.info(f"✅ Found HF_API_TOKEN (length: {len(api_token)})") else: logger.warning("❌ HF_API_TOKEN not found in environment") # Method 2: Fallback to HF_TOKEN if not api_token: api_token = os.getenv("HF_TOKEN") if api_token: logger.info(f"✅ Found HF_TOKEN (length: {len(api_token)})") else: logger.warning("❌ HF_TOKEN not found in environment") # Method 3: Hugging Face CLI token (if logged in) if not api_token: try: from huggingface_hub import get_token api_token = get_token() if api_token: logger.info(f"✅ Found CLI token (length: {len(api_token)})") except Exception as e: logger.warning(f"❌ CLI token check failed: {e}") if not api_token: logger.error("❌ No HF token found in any location") return None # Validate token format if not api_token.startswith('hf_'): logger.warning(f"⚠️ Token doesn't start with 'hf_': {api_token[:10]}...") return None try: logger.info("🔄 Initializing HuggingFace client with hf-inference provider...") # FIXED: Use the official provider-based approach client = InferenceClient( provider="hf-inference", api_key=api_token, ) # Test with official example from documentation logger.info("🧪 Testing client connectivity...") try: # Use official fill_mask example from docs test_result = client.fill_mask( "The answer to the universe is [MASK].", model="google-bert/bert-base-uncased", ) logger.info(f"✅ Client test successful - got {len(test_result)} results") except Exception as e1: try: # Fallback test with text classification test_result = client.text_classification( "I like you. I love you", model="NousResearch/Minos-v1", ) logger.info(f"✅ Client test successful with text classification") except Exception as e2: # If tests fail, client might still work - just log logger.info("✅ Client initialized (model tests may be loading)") logger.info("✅ HuggingFace client ready for use") return client except Exception as e: logger.error(f"❌ Client initialization failed: {e}") return None # Initialize client globally for Spaces CLIENT = get_client() # ══════════════════════════════════════════════════════════════════════════════ # 🛡️ SPACES-OPTIMIZED ERROR HANDLING # ══════════════════════════════════════════════════════════════════════════════ def safe_call(func_name: str, func, *args, **kwargs): """Spaces-optimized safe API calls.""" if not CLIENT: return "❌ API client not available. Please check if HF_API_TOKEN or HF_TOKEN is set in Spaces secrets." try: logger.info(f"🔄 {func_name}...") result = func(*args, **kwargs) logger.info(f"✅ {func_name} completed") return result except Exception as e: error_msg = str(e) logger.error(f"❌ {func_name} failed: {error_msg}") # Spaces-specific error handling if "429" in error_msg or "rate limit" in error_msg.lower(): return f"❌ Rate limit reached. Please wait a moment and try again." elif "503" in error_msg or "service unavailable" in error_msg.lower(): return f"❌ Service temporarily unavailable. Please try again in a few moments." elif "unauthorized" in error_msg.lower(): return f"❌ Authentication failed. Please check HF_TOKEN in Spaces settings." elif "timeout" in error_msg.lower(): return f"❌ Request timed out. The model might be loading. Please try again." else: return f"❌ Error: {error_msg}" # ══════════════════════════════════════════════════════════════════════════════ # 🎯 CORE AI FUNCTIONS - SPACES OPTIMIZED # ══════════════════════════════════════════════════════════════════════════════ def run_chat(message): """Spaces-optimized chat function.""" if not message or str(message).strip() == "": return "❌ Please enter a message" def chat_call(): clean_message = str(message).strip() messages = [{"role": "user", "content": clean_message}] # Use a smaller, faster model for Spaces completion = CLIENT.chat.completions.create( model="microsoft/DialoGPT-medium", messages=messages, max_tokens=100 # Reduced for Spaces ) return completion.choices[0].message.content return safe_call("Chat", chat_call) def run_fill_mask(text): """Optimized fill mask function.""" if not text or str(text).strip() == "" or "[MASK]" not in str(text): return "❌ Please enter text with [MASK]" def fill_mask_call(): # Try bert-base-uncased first, then fallback try: result = CLIENT.fill_mask(str(text).strip(), model="bert-base-uncased") except: # Fallback to distilbert if bert-base-uncased not available result = CLIENT.fill_mask(str(text).strip(), model="distilbert-base-uncased") if isinstance(result, list): output = "🎭 **Predictions:**\n" for i, pred in enumerate(result[:3], 1): # Limit to top 3 for Spaces token = pred.get("token_str", "").strip() score = pred.get("score", 0) output += f"{i}. **{token}** ({score:.3f})\n" return output return str(result) return safe_call("Fill Mask", fill_mask_call) def run_question_answering(question, context): """Fixed Q&A function for Spaces.""" if not question or not context: return "❌ Please provide both question and context" def qa_call(): # FIXED: Proper input format result = CLIENT.question_answering( question=str(question).strip(), context=str(context).strip(), model="distilbert-base-cased-distilled-squad" ) if isinstance(result, dict): answer = result.get('answer', 'No answer found') score = result.get('score', 0) return f"💡 **Answer:** {answer}\n📊 **Confidence:** {score:.3f}" return f"💡 **Answer:** {str(result)}" return safe_call("Question Answering", qa_call) def run_summarization(text): """Spaces-optimized summarization.""" if not text or len(str(text).strip().split()) < 10: return "❌ Please enter text with at least 10 words" def summarization_call(): result = CLIENT.summarization(str(text).strip(), model="facebook/bart-large-cnn") if isinstance(result, list) and result: summary = result[0].get('summary_text', str(result[0])) elif isinstance(result, dict): summary = result.get('summary_text', str(result)) else: summary = str(result) return f"📝 **Summary:** {summary}" return safe_call("Summarization", summarization_call) def run_text_classification(text): """Fixed text classification for Spaces.""" if not text or str(text).strip() == "": return "❌ Please enter text to classify" def classification_call(): # Try multiple models for better reliability models_to_try = [ "cardiffnlp/twitter-roberta-base-sentiment-latest", "distilbert-base-uncased-finetuned-sst-2-english", "cardiffnlp/twitter-roberta-base-sentiment" ] result = None for model in models_to_try: try: result = CLIENT.text_classification(str(text).strip(), model=model) break except Exception as e: logger.warning(f"Model {model} failed: {e}") continue if result is None: return "❌ All sentiment models unavailable. Please try again later." if isinstance(result, list): output = "🏷️ **Sentiment Analysis:**\n" for i, pred in enumerate(result[:3], 1): label = pred.get("label", "Unknown") score = pred.get("score", 0) # Clean up label names clean_label = label.replace("LABEL_", "").replace("_", " ").title() output += f"{i}. **{clean_label}** ({score:.3f})\n" return output return str(result) return safe_call("Text Classification", classification_call) def run_zero_shot_classification(text, labels): """Fixed zero-shot classification for Spaces.""" if not text or not labels: return "❌ Please provide text and labels" clean_labels = [l.strip() for l in str(labels).split(",") if l.strip()] if not clean_labels: return "❌ Please provide valid labels separated by commas" def zero_shot_call(): # FIXED: Use proper zero_shot_classification method result = CLIENT.zero_shot_classification( str(text).strip(), candidate_labels=clean_labels, model="facebook/bart-large-mnli" ) if isinstance(result, dict): labels_result = result.get('labels', []) scores = result.get('scores', []) output = "🎯 **Zero-Shot Classification:**\n" for i, (label, score) in enumerate(zip(labels_result[:3], scores[:3]), 1): output += f"{i}. **{label}** ({score:.3f})\n" return output return str(result) return safe_call("Zero-Shot Classification", zero_shot_call) def run_token_classification(text): """Fixed NER function for Spaces.""" if not text or str(text).strip() == "": return "❌ Please enter text for entity recognition" def ner_call(): result = CLIENT.token_classification(str(text).strip(), model="dslim/bert-base-NER") if isinstance(result, list): output = "🏷️ **Named Entities:**\n" entities = [] for entity in result: word = entity.get("word", "Unknown") # FIXED: Proper entity extraction label = entity.get("entity_group", entity.get("entity", "UNKNOWN")) score = entity.get("score", 0) # Skip subword tokens if word.startswith("##"): continue if score > 0.5: # Only high-confidence entities entities.append(f"**{word}** → {label} ({score:.3f})") # Remove duplicates and limit results unique_entities = list(dict.fromkeys(entities))[:8] if unique_entities: for i, entity in enumerate(unique_entities, 1): output += f"{i}. {entity}\n" return output else: return "No high-confidence entities found." return str(result) return safe_call("Named Entity Recognition", ner_call) def run_translation(text): """Spaces-optimized translation.""" if not text or str(text).strip() == "": return "❌ Please enter text to translate" def translation_call(): result = CLIENT.translation(str(text).strip(), model="Helsinki-NLP/opus-mt-en-fr") if isinstance(result, list) and result: translation = result[0].get('translation_text', str(result[0])) elif isinstance(result, dict): translation = result.get('translation_text', str(result)) else: translation = str(result) return f"🌐 **Translation (EN→FR):** {translation}" return safe_call("Translation", translation_call) def run_feature_extraction_basic(text1, text2=None): """Spaces-optimized feature extraction with optional comparison.""" if not text1 or str(text1).strip() == "": return "❌ Please enter text for analysis" def feature_extraction_call(): # Get embeddings result1 = CLIENT.feature_extraction(str(text1).strip(), model="sentence-transformers/all-MiniLM-L6-v2") if not isinstance(result1, list) or not result1: return "❌ Failed to extract features" embedding1 = np.array(result1[0] if isinstance(result1[0], list) else result1) output = f"🧮 **Feature Analysis:**\n" output += f"📏 **Vector Dimension:** {len(embedding1)}\n" output += f"🔢 **Sample Values:** {embedding1[:5].round(4).tolist()}...\n\n" # Optional comparison if text2 and str(text2).strip(): result2 = CLIENT.feature_extraction(str(text2).strip(), model="sentence-transformers/all-MiniLM-L6-v2") if isinstance(result2, list) and result2: embedding2 = np.array(result2[0] if isinstance(result2[0], list) else result2) # Calculate similarity similarity = cosine_similarity([embedding1], [embedding2])[0][0] output += f"🔍 **Comparison Results:**\n" output += f"📊 **Cosine Similarity:** {similarity:.4f}\n" if similarity > 0.8: output += "✅ **Highly Similar Texts**\n" elif similarity > 0.5: output += "🟡 **Moderately Similar Texts**\n" else: output += "🔴 **Different Semantic Meanings**\n" return output return safe_call("Feature Extraction", feature_extraction_call) def run_image_classification(image): """Spaces-optimized image classification.""" if image is None: return "❌ Please upload an image" def image_classification_call(): result = CLIENT.image_classification(image, model="google/vit-base-patch16-224") if isinstance(result, list): output = "🖼️ **Image Classification:**\n" for i, pred in enumerate(result[:3], 1): # Top 3 for Spaces label = pred.get("label", "Unknown") score = pred.get("score", 0) output += f"{i}. **{label}** ({score:.1%})\n" return output return str(result) return safe_call("Image Classification", image_classification_call) def run_text_to_image_spaces(prompt): """Spaces-optimized text-to-image generation.""" if not prompt or str(prompt).strip() == "": return None, "❌ Please enter a prompt" def text_to_image_call(): # Use a fast model suitable for Spaces image = CLIENT.text_to_image(str(prompt).strip(), model="runwayml/stable-diffusion-v1-5") status = f"🎨 **Generated!** Prompt: {str(prompt).strip()[:50]}..." return image, status try: return safe_call("Text to Image", text_to_image_call) except Exception as e: return None, f"❌ Generation failed: {str(e)}" # ══════════════════════════════════════════════════════════════════════════════ # 🎨 SPACES-OPTIMIZED GRADIO INTERFACE # ══════════════════════════════════════════════════════════════════════════════ # Custom CSS for better Spaces appearance custom_css = """ .gradio-container { max-width: 1200px !important; } .main-header { text-align: center; background: linear-gradient(90deg, #ff6b6b, #4ecdc4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 2.5em; margin: 20px 0; } .status-indicator { padding: 10px; border-radius: 5px; margin: 10px 0; } .status-connected { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; } .status-error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; } """ with gr.Blocks(title="🚀 AI Research Hub", theme=gr.themes.Soft(), css=custom_css) as demo: # Header gr.HTML("""
Optimized for Hugging Face Spaces