Upload 5 files
Browse files- .gitignore +1 -0
- README.md +64 -9
- gradio_app.py +912 -0
- requirements.txt +10 -0
- script_search_api.py +279 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
treat-env
|
README.md
CHANGED
@@ -1,14 +1,69 @@
|
|
1 |
---
|
2 |
-
title: TREAT
|
3 |
-
emoji:
|
4 |
colorFrom: gray
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
-
app_file:
|
9 |
-
pinned:
|
10 |
-
license: apache-2.0
|
11 |
-
short_description: Find Triggering Content in Movies and TV with Llama and Flan
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: TREAT-CHOCOSYRUP
|
3 |
+
emoji: 🍩
|
4 |
colorFrom: gray
|
5 |
+
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: "5.11.0"
|
8 |
+
app_file: gradio_app.py
|
9 |
+
pinned: true
|
|
|
|
|
10 |
---
|
11 |
|
12 |
+
# 🍩 TREAT-CHOCOSYRUP: Trigger Recognition for Enjoyable and Appropriate Television
|
13 |
+
|
14 |
+
TREAT-CHOCOSYRUP is an advanced content analysis tool designed to help identify potentially sensitive content and trigger warnings in text and movie scripts. With a beautiful AMOLED black interface and sweet animations, it provides both quick and detailed analysis options.
|
15 |
+
|
16 |
+
## 🎯 Features
|
17 |
+
|
18 |
+
### 1. 🍰 Quick Analysis (FLAN-T5)
|
19 |
+
- Fast and efficient content screening
|
20 |
+
- Uses FLAN-T5 model for rapid analysis
|
21 |
+
- Perfect for quick content evaluation
|
22 |
+
- Batch processing capability
|
23 |
+
|
24 |
+
### 2. 🍬 Detailed Analysis (Llama)
|
25 |
+
- In-depth content examination
|
26 |
+
- Uses Llama model for thorough analysis
|
27 |
+
- More comprehensive trigger detection
|
28 |
+
- Detailed confidence scoring
|
29 |
+
|
30 |
+
### 3. 🍿 Movie Script Analysis
|
31 |
+
- Search and analyze movie scripts
|
32 |
+
- Automatic script retrieval and processing
|
33 |
+
- Complete content warning analysis
|
34 |
+
- Easy-to-use search interface
|
35 |
+
|
36 |
+
## 🎨 Content Categories
|
37 |
+
|
38 |
+
TREAT-CHOCOSYRUP analyzes content for various sensitive topics including:
|
39 |
+
- Violence
|
40 |
+
- Death References
|
41 |
+
- Substance Use
|
42 |
+
- Gore
|
43 |
+
- Sexual Content
|
44 |
+
- Mental Health Issues
|
45 |
+
- Self-Harm
|
46 |
+
- Gun Use
|
47 |
+
- Animal Cruelty
|
48 |
+
- And more
|
49 |
+
|
50 |
+
## 🚀 How It Works
|
51 |
+
|
52 |
+
1. **Quick Analysis (FLAN-T5)**:
|
53 |
+
- Processes text in efficient batches
|
54 |
+
- Uses advanced prompt engineering
|
55 |
+
- Provides rapid results with high accuracy
|
56 |
+
|
57 |
+
2. **Detailed Analysis (Llama)**:
|
58 |
+
- Deep content examination
|
59 |
+
- Context-aware analysis
|
60 |
+
- Thorough trigger detection
|
61 |
+
- Detailed confidence scoring
|
62 |
+
|
63 |
+
3. **Movie Script Analysis**:
|
64 |
+
- Connects to IMSDB database
|
65 |
+
- Fetches and processes scripts
|
66 |
+
- Performs comprehensive analysis
|
67 |
+
- Returns detailed trigger warnings
|
68 |
+
|
69 |
+
The interface features a beautiful AMOLED black theme with sweet animations, making it both functional and visually appealing. Each analysis method is optimized for its specific use case, allowing users to choose between speed and depth of analysis.
|
gradio_app.py
ADDED
@@ -0,0 +1,912 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
3 |
+
import torch
|
4 |
+
from datetime import datetime
|
5 |
+
import gradio as gr
|
6 |
+
from typing import Dict, List, Union, Optional
|
7 |
+
import logging
|
8 |
+
import traceback
|
9 |
+
import asyncio
|
10 |
+
import httpx
|
11 |
+
import subprocess
|
12 |
+
import atexit
|
13 |
+
|
14 |
+
# Configure logging
|
15 |
+
logging.basicConfig(level=logging.INFO)
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
+
|
18 |
+
# Start the API server
|
19 |
+
def start_api_server():
|
20 |
+
# Start uvicorn in a subprocess
|
21 |
+
process = subprocess.Popen(["uvicorn", "script_search_api:app", "--reload"])
|
22 |
+
return process
|
23 |
+
|
24 |
+
# Stop the API server
|
25 |
+
def stop_api_server(process):
|
26 |
+
process.terminate()
|
27 |
+
|
28 |
+
# Register the exit handler
|
29 |
+
api_process = start_api_server()
|
30 |
+
atexit.register(stop_api_server, api_process)
|
31 |
+
|
32 |
+
class FlanT5Analyzer:
|
33 |
+
"""Fast and efficient analyzer using FLAN-T5"""
|
34 |
+
def __init__(self):
|
35 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
36 |
+
self.model = None
|
37 |
+
self.tokenizer = None
|
38 |
+
self.batch_size = 4
|
39 |
+
self.trigger_categories = {
|
40 |
+
"Violence": {
|
41 |
+
"mapped_name": "Violence",
|
42 |
+
"description": (
|
43 |
+
"Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
|
44 |
+
"Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), "
|
45 |
+
"or large-scale events like wars, riots, or violent protests."
|
46 |
+
)
|
47 |
+
},
|
48 |
+
"Death": {
|
49 |
+
"mapped_name": "Death References",
|
50 |
+
"description": (
|
51 |
+
"Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, "
|
52 |
+
"or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, "
|
53 |
+
"grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death."
|
54 |
+
)
|
55 |
+
},
|
56 |
+
"Substance_Use": {
|
57 |
+
"mapped_name": "Substance Use",
|
58 |
+
"description": (
|
59 |
+
"Any explicit reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. "
|
60 |
+
"This includes scenes of drug use, drinking, smoking, discussions about heavy substance abuse or substance-related paraphernalia."
|
61 |
+
)
|
62 |
+
},
|
63 |
+
"Gore": {
|
64 |
+
"mapped_name": "Gore",
|
65 |
+
"description": (
|
66 |
+
"Extremely detailed and graphic depictions of highly severe physical injuries, mutilation, or extreme bodily harm, often accompanied by descriptions of heavy blood, exposed organs, "
|
67 |
+
"or dismemberment. This includes war scenes with severe casualties, horror scenarios involving grotesque creatures, or medical procedures depicted with excessive detail."
|
68 |
+
)
|
69 |
+
},
|
70 |
+
"Sexual_Content": {
|
71 |
+
"mapped_name": "Sexual Content",
|
72 |
+
"description": (
|
73 |
+
"Any depiction of sexual activity, intimacy, or sexual behavior, ranging from implied scenes to explicit descriptions. "
|
74 |
+
"This includes physical descriptions of characters in a sexual context, sexual dialogue, or references to sexual themes."
|
75 |
+
)
|
76 |
+
},
|
77 |
+
"Sexual_Abuse": {
|
78 |
+
"mapped_name": "Sexual Abuse",
|
79 |
+
"description": (
|
80 |
+
"Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force. "
|
81 |
+
"This includes incidents of sexual assault, exploitation, harassment, and any acts where an individual is subjected to sexual acts against their will."
|
82 |
+
)
|
83 |
+
},
|
84 |
+
"Self_Harm": {
|
85 |
+
"mapped_name": "Self-Harm",
|
86 |
+
"description": (
|
87 |
+
"Any mention or depiction of behaviors where an individual intentionally causes harm to themselves. This includes cutting, burning, or other forms of physical injury, "
|
88 |
+
"as well as suicidal ideation, suicide attempts, or discussions of self-destructive thoughts and actions."
|
89 |
+
)
|
90 |
+
},
|
91 |
+
"Mental_Health": {
|
92 |
+
"mapped_name": "Mental Health Issues",
|
93 |
+
"description": (
|
94 |
+
"Any reference to extreme mental health struggles, disorders, or psychological distress. This includes depictions of depression, anxiety, PTSD, bipolar disorder, "
|
95 |
+
"or other conditions. Also includes toxic traits such as Gaslighting or other psychological horrors"
|
96 |
+
)
|
97 |
+
}
|
98 |
+
}
|
99 |
+
logger.info(f"Initialized FLAN-T5 analyzer with device: {self.device}")
|
100 |
+
|
101 |
+
async def load_model(self, progress=None) -> None:
|
102 |
+
"""Load the model and tokenizer with progress updates."""
|
103 |
+
try:
|
104 |
+
if progress:
|
105 |
+
progress(0.1, "🍩 Loading tokenizer...")
|
106 |
+
|
107 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
108 |
+
"google/flan-t5-base",
|
109 |
+
use_fast=True
|
110 |
+
)
|
111 |
+
|
112 |
+
if progress:
|
113 |
+
progress(0.3, "🍰 Loading model...")
|
114 |
+
|
115 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(
|
116 |
+
"google/flan-t5-base",
|
117 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
118 |
+
device_map="auto"
|
119 |
+
)
|
120 |
+
|
121 |
+
if self.device == "cuda":
|
122 |
+
self.model.eval()
|
123 |
+
torch.cuda.empty_cache()
|
124 |
+
|
125 |
+
if progress:
|
126 |
+
progress(0.5, "🧁 Model loaded successfully")
|
127 |
+
|
128 |
+
except Exception as e:
|
129 |
+
logger.error(f"Error loading model: {str(e)}")
|
130 |
+
raise
|
131 |
+
|
132 |
+
def _chunk_text(self, text: str, chunk_size: int = 512, overlap: int = 30) -> List[str]:
|
133 |
+
"""Split text into overlapping chunks."""
|
134 |
+
words = text.split()
|
135 |
+
chunks = []
|
136 |
+
for i in range(0, len(words), chunk_size - overlap):
|
137 |
+
chunk = ' '.join(words[i:i + chunk_size])
|
138 |
+
chunks.append(chunk)
|
139 |
+
return chunks
|
140 |
+
|
141 |
+
def _validate_response(self, response: str) -> str:
|
142 |
+
"""Validate and clean model response."""
|
143 |
+
valid_responses = {"YES", "NO", "MAYBE"}
|
144 |
+
response = response.strip().upper()
|
145 |
+
first_word = response.split()[0] if response else "NO"
|
146 |
+
return first_word if first_word in valid_responses else "NO"
|
147 |
+
|
148 |
+
async def analyze_chunks_batch(
|
149 |
+
self,
|
150 |
+
chunks: List[str],
|
151 |
+
progress: Optional[gr.Progress] = None,
|
152 |
+
current_progress: float = 0,
|
153 |
+
progress_step: float = 0
|
154 |
+
) -> Dict[str, float]:
|
155 |
+
"""Analyze multiple chunks in batches."""
|
156 |
+
all_triggers = {}
|
157 |
+
|
158 |
+
for category, info in self.trigger_categories.items():
|
159 |
+
mapped_name = info["mapped_name"]
|
160 |
+
description = info["description"]
|
161 |
+
|
162 |
+
for i in range(0, len(chunks), self.batch_size):
|
163 |
+
batch_chunks = chunks[i:i + self.batch_size]
|
164 |
+
prompts = []
|
165 |
+
|
166 |
+
for chunk in batch_chunks:
|
167 |
+
prompt = f"""
|
168 |
+
Task: Analyze if this text contains {mapped_name}.
|
169 |
+
Context: {description}
|
170 |
+
Text: "{chunk}"
|
171 |
+
|
172 |
+
Rules for analysis:
|
173 |
+
1. Only answer YES if there is clear, direct evidence
|
174 |
+
2. Answer NO if the content is ambiguous or metaphorical
|
175 |
+
3. Consider the severity and context
|
176 |
+
|
177 |
+
Answer with ONLY ONE word: YES, NO, or MAYBE
|
178 |
+
"""
|
179 |
+
prompts.append(prompt)
|
180 |
+
|
181 |
+
try:
|
182 |
+
inputs = self.tokenizer(
|
183 |
+
prompts,
|
184 |
+
return_tensors="pt",
|
185 |
+
padding=True,
|
186 |
+
truncation=True,
|
187 |
+
max_length=512
|
188 |
+
).to(self.device)
|
189 |
+
|
190 |
+
with torch.no_grad():
|
191 |
+
outputs = self.model.generate(
|
192 |
+
**inputs,
|
193 |
+
max_new_tokens=20,
|
194 |
+
temperature=0.2,
|
195 |
+
top_p=0.85,
|
196 |
+
num_beams=3,
|
197 |
+
early_stopping=True,
|
198 |
+
pad_token_id=self.tokenizer.eos_token_id,
|
199 |
+
do_sample=True
|
200 |
+
)
|
201 |
+
|
202 |
+
responses = [
|
203 |
+
self.tokenizer.decode(output, skip_special_tokens=True)
|
204 |
+
for output in outputs
|
205 |
+
]
|
206 |
+
|
207 |
+
for response in responses:
|
208 |
+
validated_response = self._validate_response(response)
|
209 |
+
if validated_response == "YES":
|
210 |
+
all_triggers[mapped_name] = all_triggers.get(mapped_name, 0) + 1
|
211 |
+
elif validated_response == "MAYBE":
|
212 |
+
all_triggers[mapped_name] = all_triggers.get(mapped_name, 0) + 0.5
|
213 |
+
|
214 |
+
except Exception as e:
|
215 |
+
logger.error(f"Error processing batch for {mapped_name}: {str(e)}")
|
216 |
+
continue
|
217 |
+
|
218 |
+
if progress:
|
219 |
+
current_progress += progress_step
|
220 |
+
progress(min(current_progress, 0.9), f"🍭 Analyzing {mapped_name}...")
|
221 |
+
|
222 |
+
return all_triggers
|
223 |
+
|
224 |
+
async def analyze_script(self, script: str, progress: Optional[gr.Progress] = None) -> List[str]:
|
225 |
+
"""Analyze the entire script."""
|
226 |
+
if not self.model or not self.tokenizer:
|
227 |
+
await self.load_model(progress)
|
228 |
+
|
229 |
+
chunks = self._chunk_text(script)
|
230 |
+
identified_triggers = await self.analyze_chunks_batch(
|
231 |
+
chunks,
|
232 |
+
progress,
|
233 |
+
current_progress=0.5,
|
234 |
+
progress_step=0.4 / (len(chunks) * len(self.trigger_categories))
|
235 |
+
)
|
236 |
+
|
237 |
+
if progress:
|
238 |
+
progress(0.95, "🍫 Finalizing results...")
|
239 |
+
|
240 |
+
final_triggers = []
|
241 |
+
chunk_threshold = max(1, len(chunks) * 0.1)
|
242 |
+
|
243 |
+
for mapped_name, count in identified_triggers.items():
|
244 |
+
if count >= chunk_threshold:
|
245 |
+
final_triggers.append(mapped_name)
|
246 |
+
|
247 |
+
return final_triggers if final_triggers else ["None"]
|
248 |
+
|
249 |
+
|
250 |
+
class LlamaAnalyzer:
|
251 |
+
"""Detailed analyzer using Llama for thorough analysis"""
|
252 |
+
def __init__(self):
|
253 |
+
self.hf_token = os.getenv("HF_TOKEN")
|
254 |
+
if not self.hf_token:
|
255 |
+
raise ValueError("HF_TOKEN environment variable is not set!")
|
256 |
+
|
257 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
258 |
+
self.model = None
|
259 |
+
self.tokenizer = None
|
260 |
+
logger.info(f"Initialized Llama analyzer with device: {self.device}")
|
261 |
+
|
262 |
+
async def load_model(self, progress=None) -> None:
|
263 |
+
"""Load the model and tokenizer with progress updates and detailed logging."""
|
264 |
+
try:
|
265 |
+
print("\n=== Starting Llama Model Loading ===")
|
266 |
+
print(f"Time: {datetime.now()}")
|
267 |
+
|
268 |
+
if progress:
|
269 |
+
progress(0.1, "🍩 Loading Llama tokenizer...")
|
270 |
+
|
271 |
+
print("Loading tokenizer...")
|
272 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
273 |
+
"meta-llama/Llama-3.2-3B",
|
274 |
+
use_fast=True
|
275 |
+
)
|
276 |
+
|
277 |
+
if progress:
|
278 |
+
progress(0.3, "🍰 Loading Llama model...")
|
279 |
+
|
280 |
+
print(f"Loading model on {self.device}...")
|
281 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
282 |
+
"meta-llama/Llama-3.2-3B",
|
283 |
+
token=self.hf_token,
|
284 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
285 |
+
device_map="auto"
|
286 |
+
)
|
287 |
+
|
288 |
+
if progress:
|
289 |
+
progress(0.5, "🧁 Llama model loaded successfully")
|
290 |
+
|
291 |
+
print("Model and tokenizer loaded successfully")
|
292 |
+
logger.info(f"Model loaded successfully on {self.device}")
|
293 |
+
except Exception as e:
|
294 |
+
logger.error(f"Error loading model: {str(e)}")
|
295 |
+
print(f"\nERROR DURING MODEL LOADING: {str(e)}")
|
296 |
+
print("Stack trace:")
|
297 |
+
traceback.print_exc()
|
298 |
+
raise
|
299 |
+
|
300 |
+
def _chunk_text(self, text: str, chunk_size: int = 256, overlap: int = 15) -> List[str]:
|
301 |
+
"""Split text into overlapping chunks for processing."""
|
302 |
+
chunks = []
|
303 |
+
for i in range(0, len(text), chunk_size - overlap):
|
304 |
+
chunk = text[i:i + chunk_size]
|
305 |
+
chunks.append(chunk)
|
306 |
+
print(f"Split text into {len(chunks)} chunks with {overlap} token overlap")
|
307 |
+
return chunks
|
308 |
+
|
309 |
+
async def analyze_chunk(
|
310 |
+
self,
|
311 |
+
chunk: str,
|
312 |
+
trigger_categories: Dict,
|
313 |
+
progress: Optional[gr.Progress] = None,
|
314 |
+
current_progress: float = 0,
|
315 |
+
progress_step: float = 0
|
316 |
+
) -> Dict[str, float]:
|
317 |
+
"""Analyze a single chunk of text for triggers with detailed logging."""
|
318 |
+
chunk_triggers = {}
|
319 |
+
print(f"\n--- Processing Chunk ---")
|
320 |
+
print(f"Chunk text (preview): {chunk[:50]}...")
|
321 |
+
|
322 |
+
for category, info in trigger_categories.items():
|
323 |
+
mapped_name = info["mapped_name"]
|
324 |
+
description = info["description"]
|
325 |
+
|
326 |
+
print(f"\nAnalyzing for {mapped_name}...")
|
327 |
+
prompt = f"""
|
328 |
+
Check this text for any clear indication of {mapped_name} ({description}).
|
329 |
+
only say yes if you are confident, make sure the text is not metaphorical.
|
330 |
+
Respond concisely and only with: YES, NO, or MAYBE.
|
331 |
+
Text: {chunk}
|
332 |
+
Answer:
|
333 |
+
"""
|
334 |
+
|
335 |
+
try:
|
336 |
+
print("Sending prompt to model...")
|
337 |
+
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
338 |
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
339 |
+
|
340 |
+
with torch.no_grad():
|
341 |
+
print("Generating response...")
|
342 |
+
outputs = self.model.generate(
|
343 |
+
**inputs,
|
344 |
+
max_new_tokens=2,
|
345 |
+
do_sample=True,
|
346 |
+
temperature=0.3,
|
347 |
+
top_p=0.9,
|
348 |
+
pad_token_id=self.tokenizer.eos_token_id
|
349 |
+
)
|
350 |
+
|
351 |
+
response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper()
|
352 |
+
first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO"
|
353 |
+
print(f"Model response for {mapped_name}: {first_word}")
|
354 |
+
|
355 |
+
if first_word == "YES":
|
356 |
+
print(f"Detected {mapped_name} in this chunk!")
|
357 |
+
chunk_triggers[mapped_name] = chunk_triggers.get(mapped_name, 0) + 1
|
358 |
+
elif first_word == "MAYBE":
|
359 |
+
print(f"Possible {mapped_name} detected, marking for further review.")
|
360 |
+
chunk_triggers[mapped_name] = chunk_triggers.get(mapped_name, 0) + 0.5
|
361 |
+
else:
|
362 |
+
print(f"No {mapped_name} detected in this chunk.")
|
363 |
+
|
364 |
+
if progress:
|
365 |
+
current_progress += progress_step
|
366 |
+
progress(min(current_progress, 0.9), f"🍭 Analyzing {mapped_name}...")
|
367 |
+
|
368 |
+
except Exception as e:
|
369 |
+
logger.error(f"Error analyzing chunk for {mapped_name}: {str(e)}")
|
370 |
+
print(f"Error during analysis of {mapped_name}: {str(e)}")
|
371 |
+
traceback.print_exc()
|
372 |
+
|
373 |
+
return chunk_triggers
|
374 |
+
|
375 |
+
async def analyze_script(self, script: str, progress: Optional[gr.Progress] = None) -> List[str]:
|
376 |
+
"""Analyze the entire script for triggers with progress updates and detailed logging."""
|
377 |
+
print("\n=== Starting Script Analysis ===")
|
378 |
+
print(f"Time: {datetime.now()}")
|
379 |
+
|
380 |
+
if not self.model or not self.tokenizer:
|
381 |
+
await self.load_model(progress)
|
382 |
+
|
383 |
+
# Initialize trigger categories
|
384 |
+
trigger_categories = {
|
385 |
+
"Violence": {
|
386 |
+
"mapped_name": "Violence",
|
387 |
+
"description": (
|
388 |
+
"Any act of physical force meant to cause harm, injury, or death, including fights, threats, and large-scale violence like wars or riots."
|
389 |
+
)
|
390 |
+
},
|
391 |
+
"Death": {
|
392 |
+
"mapped_name": "Death References",
|
393 |
+
"description": (
|
394 |
+
"Mentions or depictions of death, such as characters dying, references to deceased people, funerals, or mourning."
|
395 |
+
)
|
396 |
+
},
|
397 |
+
"Substance Use": {
|
398 |
+
"mapped_name": "Substance Use",
|
399 |
+
"description": (
|
400 |
+
"Any reference to using or abusing drugs, alcohol, or other substances, including scenes of drinking, smoking, or drug use."
|
401 |
+
)
|
402 |
+
},
|
403 |
+
"Gore": {
|
404 |
+
"mapped_name": "Gore",
|
405 |
+
"description": (
|
406 |
+
"Graphic depictions of severe injuries or mutilation, often with detailed blood, exposed organs, or dismemberment."
|
407 |
+
)
|
408 |
+
},
|
409 |
+
"Vomit": {
|
410 |
+
"mapped_name": "Vomit",
|
411 |
+
"description": (
|
412 |
+
"Any explicit reference to vomiting or related actions. This includes only very specific mentions of nausea or the act of vomiting, with more focus on the direct description, only flag this if you absolutely believe it's present."
|
413 |
+
)
|
414 |
+
},
|
415 |
+
"Sexual Content": {
|
416 |
+
"mapped_name": "Sexual Content",
|
417 |
+
"description": (
|
418 |
+
"Depictions or mentions of sexual activity, intimacy, or behavior, including sexual themes like harassment or innuendo."
|
419 |
+
)
|
420 |
+
},
|
421 |
+
"Sexual Abuse": {
|
422 |
+
"mapped_name": "Sexual Abuse",
|
423 |
+
"description": (
|
424 |
+
"Explicit non-consensual sexual acts, including assault, molestation, or harassment, and the emotional or legal consequences of such abuse. A stronger focus on detailed depictions or direct references to coercion or violence."
|
425 |
+
)
|
426 |
+
},
|
427 |
+
"Self-Harm": {
|
428 |
+
"mapped_name": "Self-Harm",
|
429 |
+
"description": (
|
430 |
+
"Depictions or mentions of intentional self-injury, including acts like cutting, burning, or other self-destructive behavior. Emphasis on more graphic or repeated actions, not implied or casual references."
|
431 |
+
)
|
432 |
+
},
|
433 |
+
"Gun Use": {
|
434 |
+
"mapped_name": "Gun Use",
|
435 |
+
"description": (
|
436 |
+
"Explicit mentions of firearms in use, including threatening actions or accidents involving guns. Only triggers when the gun use is shown in a clear, violent context."
|
437 |
+
)
|
438 |
+
},
|
439 |
+
"Animal Cruelty": {
|
440 |
+
"mapped_name": "Animal Cruelty",
|
441 |
+
"description": (
|
442 |
+
"Direct or explicit harm, abuse, or neglect of animals, including physical abuse or suffering, and actions performed for human entertainment or experimentation. Triggers only in clear, violent depictions."
|
443 |
+
)
|
444 |
+
},
|
445 |
+
"Mental Health Issues": {
|
446 |
+
"mapped_name": "Mental Health Issues",
|
447 |
+
"description": (
|
448 |
+
"References to psychological struggles, such as depression, anxiety, or PTSD, including therapy or coping mechanisms."
|
449 |
+
)
|
450 |
+
}
|
451 |
+
}
|
452 |
+
|
453 |
+
chunks = self._chunk_text(script)
|
454 |
+
identified_triggers = {}
|
455 |
+
progress_step = 0.4 / (len(chunks) * len(trigger_categories))
|
456 |
+
current_progress = 0.5 # Starting after model loading
|
457 |
+
|
458 |
+
for chunk_idx, chunk in enumerate(chunks, 1):
|
459 |
+
chunk_triggers = await self.analyze_chunk(
|
460 |
+
chunk,
|
461 |
+
trigger_categories,
|
462 |
+
progress,
|
463 |
+
current_progress,
|
464 |
+
progress_step
|
465 |
+
)
|
466 |
+
|
467 |
+
for trigger, count in chunk_triggers.items():
|
468 |
+
identified_triggers[trigger] = identified_triggers.get(trigger, 0) + count
|
469 |
+
|
470 |
+
if progress:
|
471 |
+
progress(0.95, "🍫 Finalizing detailed results...")
|
472 |
+
|
473 |
+
print("\n=== Analysis Complete ===")
|
474 |
+
print("Final Results:")
|
475 |
+
final_triggers = []
|
476 |
+
|
477 |
+
for mapped_name, count in identified_triggers.items():
|
478 |
+
if count > 0.5:
|
479 |
+
final_triggers.append(mapped_name)
|
480 |
+
print(f"- {mapped_name}: found in {count} chunks")
|
481 |
+
|
482 |
+
if not final_triggers:
|
483 |
+
print("No triggers detected")
|
484 |
+
final_triggers = ["None"]
|
485 |
+
|
486 |
+
return final_triggers
|
487 |
+
|
488 |
+
|
489 |
+
async def analyze_content_flant5(
|
490 |
+
script: str,
|
491 |
+
progress: Optional[gr.Progress] = None
|
492 |
+
) -> Dict[str, Union[List[str], str]]:
|
493 |
+
"""Main analysis function using FLAN-T5."""
|
494 |
+
logger.info("Starting FLAN-T5 content analysis")
|
495 |
+
|
496 |
+
analyzer = FlanT5Analyzer()
|
497 |
+
|
498 |
+
try:
|
499 |
+
triggers = await analyzer.analyze_script(script, progress)
|
500 |
+
|
501 |
+
if progress:
|
502 |
+
progress(1.0, "🎉 Analysis complete!")
|
503 |
+
|
504 |
+
result = {
|
505 |
+
"detected_triggers": triggers,
|
506 |
+
"confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
|
507 |
+
"model": "google/flan-t5-base",
|
508 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
509 |
+
}
|
510 |
+
|
511 |
+
logger.info(f"Analysis complete: {result}")
|
512 |
+
return result
|
513 |
+
|
514 |
+
except Exception as e:
|
515 |
+
logger.error(f"Analysis error: {str(e)}")
|
516 |
+
return {
|
517 |
+
"detected_triggers": ["Error occurred during analysis"],
|
518 |
+
"confidence": "Error",
|
519 |
+
"model": "google/flan-t5-base",
|
520 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
521 |
+
"error": str(e)
|
522 |
+
}
|
523 |
+
|
524 |
+
|
525 |
+
async def analyze_content_llama(
|
526 |
+
script: str,
|
527 |
+
progress: Optional[gr.Progress] = None
|
528 |
+
) -> Dict[str, Union[List[str], str]]:
|
529 |
+
"""Main analysis function using Llama for detailed analysis."""
|
530 |
+
print("\n=== Starting Llama Content Analysis ===")
|
531 |
+
print(f"Time: {datetime.now()}")
|
532 |
+
|
533 |
+
analyzer = LlamaAnalyzer()
|
534 |
+
|
535 |
+
try:
|
536 |
+
triggers = await analyzer.analyze_script(script, progress)
|
537 |
+
|
538 |
+
if progress:
|
539 |
+
progress(1.0, "🎉 Detailed analysis complete!")
|
540 |
+
|
541 |
+
result = {
|
542 |
+
"detected_triggers": triggers,
|
543 |
+
"confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
|
544 |
+
"model": "Llama-3.2-3B",
|
545 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
546 |
+
}
|
547 |
+
|
548 |
+
print("\nFinal Result Dictionary:", result)
|
549 |
+
return result
|
550 |
+
|
551 |
+
except Exception as e:
|
552 |
+
logger.error(f"Analysis error: {str(e)}")
|
553 |
+
print(f"\nERROR OCCURRED: {str(e)}")
|
554 |
+
print("Stack trace:")
|
555 |
+
traceback.print_exc()
|
556 |
+
return {
|
557 |
+
"detected_triggers": ["Error occurred during analysis"],
|
558 |
+
"confidence": "Error",
|
559 |
+
"model": "Llama-3.2-3B",
|
560 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
561 |
+
"error": str(e)
|
562 |
+
}
|
563 |
+
|
564 |
+
|
565 |
+
async def search_movie_script(movie_name: str) -> Dict:
|
566 |
+
"""Search for and analyze a movie script using the API service."""
|
567 |
+
async with httpx.AsyncClient() as client:
|
568 |
+
try:
|
569 |
+
# Start search
|
570 |
+
response = await client.post(
|
571 |
+
"http://localhost:8000/search",
|
572 |
+
json={"movie_name": movie_name}
|
573 |
+
)
|
574 |
+
response.raise_for_status()
|
575 |
+
task_data = response.json()
|
576 |
+
task_id = task_data["task_id"]
|
577 |
+
|
578 |
+
# Poll for results
|
579 |
+
while True:
|
580 |
+
status_response = await client.get(f"http://localhost:8000/progress/{task_id}")
|
581 |
+
status_response.raise_for_status()
|
582 |
+
status_data = status_response.json()
|
583 |
+
|
584 |
+
if status_data["is_complete"]:
|
585 |
+
if status_data["error"]:
|
586 |
+
return {"error": status_data["error"]}
|
587 |
+
return status_data["result"]
|
588 |
+
|
589 |
+
await asyncio.sleep(1)
|
590 |
+
|
591 |
+
except Exception as e:
|
592 |
+
return {"error": f"Error during movie search: {str(e)}"}
|
593 |
+
|
594 |
+
|
595 |
+
# CSS animations for movie search tab
|
596 |
+
movie_search_html = """
|
597 |
+
<div style="text-align: center; padding: 15px;">
|
598 |
+
<h3>🎬 Movie Script Analysis</h3>
|
599 |
+
<p style="color: #ff9ff3;">Search and analyze movie scripts for content warnings</p>
|
600 |
+
</div>
|
601 |
+
"""
|
602 |
+
|
603 |
+
# Custom CSS for AMOLED black theme with sweets animations
|
604 |
+
custom_css = """
|
605 |
+
/* AMOLED Black Theme with Sweet Animations */
|
606 |
+
body, .gradio-container {
|
607 |
+
background: linear-gradient(135deg, #000000 0%, #1a1a2e 50%, #16213e 100%) !important;
|
608 |
+
color: #ffffff !important;
|
609 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
610 |
+
}
|
611 |
+
|
612 |
+
/* Animated background with floating donuts */
|
613 |
+
.gradio-container::before {
|
614 |
+
content: '';
|
615 |
+
position: fixed;
|
616 |
+
top: 0;
|
617 |
+
left: 0;
|
618 |
+
width: 100%;
|
619 |
+
height: 100%;
|
620 |
+
background-image:
|
621 |
+
radial-gradient(circle at 20% 80%, #ff6b6b22 0%, transparent 50%),
|
622 |
+
radial-gradient(circle at 80% 20%, #4ecdc422 0%, transparent 50%),
|
623 |
+
radial-gradient(circle at 40% 40%, #45b7d122 0%, transparent 50%);
|
624 |
+
animation: float 20s ease-in-out infinite;
|
625 |
+
pointer-events: none;
|
626 |
+
z-index: -1;
|
627 |
+
}
|
628 |
+
|
629 |
+
@keyframes float {
|
630 |
+
0%, 100% { transform: translateY(0px); }
|
631 |
+
50% { transform: translateY(-20px); }
|
632 |
+
}
|
633 |
+
|
634 |
+
/* Donut loading animation */
|
635 |
+
@keyframes donut-spin {
|
636 |
+
0% { transform: rotate(0deg); }
|
637 |
+
100% { transform: rotate(360deg); }
|
638 |
+
}
|
639 |
+
|
640 |
+
.loading-donut {
|
641 |
+
display: inline-block;
|
642 |
+
width: 40px;
|
643 |
+
height: 40px;
|
644 |
+
border: 4px solid #ff6b6b;
|
645 |
+
border-radius: 50%;
|
646 |
+
border-top-color: #4ecdc4;
|
647 |
+
animation: donut-spin 1s ease-in-out infinite;
|
648 |
+
}
|
649 |
+
|
650 |
+
/* Progress bar styling */
|
651 |
+
.progress-bar {
|
652 |
+
background: linear-gradient(45deg, #ff6b6b, #4ecdc4, #45b7d1) !important;
|
653 |
+
border-radius: 20px !important;
|
654 |
+
animation: progress-glow 2s ease-in-out infinite alternate;
|
655 |
+
}
|
656 |
+
|
657 |
+
@keyframes progress-glow {
|
658 |
+
from { box-shadow: 0 0 5px #ff6b6b; }
|
659 |
+
to { box-shadow: 0 0 20px #4ecdc4, 0 0 30px #45b7d1; }
|
660 |
+
}
|
661 |
+
|
662 |
+
/* Input and output styling */
|
663 |
+
.gr-textbox, .gr-json {
|
664 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
665 |
+
border: 2px solid rgba(255, 107, 107, 0.3) !important;
|
666 |
+
border-radius: 15px !important;
|
667 |
+
backdrop-filter: blur(10px) !important;
|
668 |
+
color: #ffffff !important;
|
669 |
+
transition: all 0.3s ease !important;
|
670 |
+
}
|
671 |
+
|
672 |
+
.gr-textbox:focus, .gr-json:focus {
|
673 |
+
border-color: #4ecdc4 !important;
|
674 |
+
box-shadow: 0 0 15px rgba(78, 205, 196, 0.3) !important;
|
675 |
+
transform: scale(1.02) !important;
|
676 |
+
}
|
677 |
+
|
678 |
+
/* Button styling */
|
679 |
+
.gr-button {
|
680 |
+
background: linear-gradient(45deg, #ff6b6b, #4ecdc4) !important;
|
681 |
+
border: none !important;
|
682 |
+
border-radius: 25px !important;
|
683 |
+
color: white !important;
|
684 |
+
font-weight: bold !important;
|
685 |
+
padding: 12px 24px !important;
|
686 |
+
transition: all 0.3s ease !important;
|
687 |
+
position: relative !important;
|
688 |
+
overflow: hidden !important;
|
689 |
+
}
|
690 |
+
|
691 |
+
.gr-button::before {
|
692 |
+
content: '';
|
693 |
+
position: absolute;
|
694 |
+
top: 50%;
|
695 |
+
left: 50%;
|
696 |
+
width: 0;
|
697 |
+
height: 0;
|
698 |
+
background: rgba(255, 255, 255, 0.2);
|
699 |
+
border-radius: 50%;
|
700 |
+
transform: translate(-50%, -50%);
|
701 |
+
transition: width 0.6s, height 0.6s;
|
702 |
+
}
|
703 |
+
|
704 |
+
.gr-button:hover::before {
|
705 |
+
width: 300px;
|
706 |
+
height: 300px;
|
707 |
+
}
|
708 |
+
|
709 |
+
.gr-button:hover {
|
710 |
+
transform: translateY(-2px) !important;
|
711 |
+
box-shadow: 0 10px 20px rgba(255, 107, 107, 0.3) !important;
|
712 |
+
}
|
713 |
+
|
714 |
+
/* Tab styling */
|
715 |
+
.gr-tab {
|
716 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
717 |
+
border: 2px solid rgba(255, 107, 107, 0.3) !important;
|
718 |
+
border-radius: 15px 15px 0 0 !important;
|
719 |
+
color: #ffffff !important;
|
720 |
+
transition: all 0.3s ease !important;
|
721 |
+
}
|
722 |
+
|
723 |
+
.gr-tab:hover, .gr-tab.selected {
|
724 |
+
background: linear-gradient(45deg, rgba(255, 107, 107, 0.2), rgba(78, 205, 196, 0.2)) !important;
|
725 |
+
border-color: #4ecdc4 !important;
|
726 |
+
transform: translateY(-2px) !important;
|
727 |
+
}
|
728 |
+
|
729 |
+
/* Title styling */
|
730 |
+
h1, h2, h3 {
|
731 |
+
background: linear-gradient(45deg, #ff6b6b, #4ecdc4, #45b7d1);
|
732 |
+
-webkit-background-clip: text;
|
733 |
+
-webkit-text-fill-color: transparent;
|
734 |
+
background-clip: text;
|
735 |
+
text-align: center;
|
736 |
+
margin: 20px 0;
|
737 |
+
animation: title-glow 3s ease-in-out infinite alternate;
|
738 |
+
}
|
739 |
+
|
740 |
+
@keyframes title-glow {
|
741 |
+
from { filter: drop-shadow(0 0 5px rgba(255, 107, 107, 0.5)); }
|
742 |
+
to { filter: drop-shadow(0 0 15px rgba(78, 205, 196, 0.5)); }
|
743 |
+
}
|
744 |
+
|
745 |
+
/* Sweet emoji animations */
|
746 |
+
.sweet-emoji {
|
747 |
+
display: inline-block;
|
748 |
+
animation: bounce 2s infinite;
|
749 |
+
}
|
750 |
+
|
751 |
+
@keyframes bounce {
|
752 |
+
0%, 100% { transform: translateY(0); }
|
753 |
+
50% { transform: translateY(-10px); }
|
754 |
+
}
|
755 |
+
|
756 |
+
/* Glassmorphism effect for containers */
|
757 |
+
.gr-group {
|
758 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
759 |
+
backdrop-filter: blur(15px) !important;
|
760 |
+
border: 1px solid rgba(255, 255, 255, 0.1) !important;
|
761 |
+
border-radius: 20px !important;
|
762 |
+
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3) !important;
|
763 |
+
}
|
764 |
+
|
765 |
+
/* JSON output special styling */
|
766 |
+
.gr-json pre {
|
767 |
+
background: rgba(0, 0, 0, 0.5) !important;
|
768 |
+
border: 1px solid rgba(78, 205, 196, 0.3) !important;
|
769 |
+
border-radius: 10px !important;
|
770 |
+
color: #4ecdc4 !important;
|
771 |
+
font-family: 'Fira Code', monospace !important;
|
772 |
+
}
|
773 |
+
|
774 |
+
/* Progress text styling */
|
775 |
+
.progress-text {
|
776 |
+
color: #4ecdc4 !important;
|
777 |
+
font-weight: bold !important;
|
778 |
+
text-shadow: 0 0 10px rgba(78, 205, 196, 0.5) !important;
|
779 |
+
}
|
780 |
+
"""
|
781 |
+
|
782 |
+
if __name__ == "__main__":
|
783 |
+
# Create the Gradio interface with tabs
|
784 |
+
with gr.Blocks(
|
785 |
+
title="🍩 TREAT-CHOCOSYRUP 🧁 Content Analysis",
|
786 |
+
css=custom_css,
|
787 |
+
theme=gr.themes.Base()
|
788 |
+
) as app:
|
789 |
+
gr.HTML("""
|
790 |
+
<div style="text-align: center; padding: 20px;">
|
791 |
+
<h1 style="font-size: 3em; margin-bottom: 10px;">
|
792 |
+
🍩 TREAT-CHOCOSYRUP 🧁
|
793 |
+
</h1>
|
794 |
+
<h2 style="font-size: 1.5em; margin-top: 0;">
|
795 |
+
<span class="sweet-emoji">🍭</span> Content Analysis & Trigger Detection <span class="sweet-emoji">🍰</span>
|
796 |
+
</h2>
|
797 |
+
<p style="color: #4ecdc4; font-size: 1.1em; margin-top: 20px;">
|
798 |
+
Analyze your content for sensitive topics with our sweet AI models
|
799 |
+
</p>
|
800 |
+
</div>
|
801 |
+
""")
|
802 |
+
|
803 |
+
with gr.Tabs():
|
804 |
+
# Default FLAN-T5 Tab
|
805 |
+
with gr.Tab("🍰 Quick Analysis (FLAN-T5)", elem_id="flant5-tab"):
|
806 |
+
gr.HTML("""
|
807 |
+
<div style="text-align: center; padding: 15px;">
|
808 |
+
<h3>🚀 Fast & Efficient Analysis</h3>
|
809 |
+
<p style="color: #ff9ff3;">Perfect for quick content screening with high accuracy</p>
|
810 |
+
</div>
|
811 |
+
""")
|
812 |
+
|
813 |
+
# Input and analyze button for FLAN-T5
|
814 |
+
with gr.Row():
|
815 |
+
text_input_flant5 = gr.Textbox(
|
816 |
+
lines=8,
|
817 |
+
label="Input Text",
|
818 |
+
placeholder="Enter your text here for analysis...",
|
819 |
+
elem_id="input-text-flant5"
|
820 |
+
)
|
821 |
+
|
822 |
+
with gr.Row():
|
823 |
+
analyze_button_flant5 = gr.Button("🔍 Analyze Content", variant="primary")
|
824 |
+
clear_button_flant5 = gr.Button("🧹 Clear", variant="secondary")
|
825 |
+
|
826 |
+
# Output area for FLAN-T5
|
827 |
+
output_json_flant5 = gr.JSON(label="Analysis Results")
|
828 |
+
|
829 |
+
# Button click events for FLAN-T5
|
830 |
+
analyze_button_flant5.click(
|
831 |
+
fn=analyze_content_flant5,
|
832 |
+
inputs=[text_input_flant5],
|
833 |
+
outputs=[output_json_flant5]
|
834 |
+
)
|
835 |
+
clear_button_flant5.click(
|
836 |
+
fn=lambda: ("", None),
|
837 |
+
inputs=[],
|
838 |
+
outputs=[text_input_flant5, output_json_flant5]
|
839 |
+
)
|
840 |
+
|
841 |
+
# Detailed Llama Analysis Tab
|
842 |
+
with gr.Tab("🍬 Detailed Analysis (Llama)", elem_id="llama-tab"):
|
843 |
+
gr.HTML("""
|
844 |
+
<div style="text-align: center; padding: 15px;">
|
845 |
+
<h3>🎯 Deep & Thorough Analysis</h3>
|
846 |
+
<p style="color: #4ecdc4;">Advanced analysis for comprehensive content evaluation</p>
|
847 |
+
</div>
|
848 |
+
""")
|
849 |
+
|
850 |
+
# Input and analyze button for Llama
|
851 |
+
with gr.Row():
|
852 |
+
text_input_llama = gr.Textbox(
|
853 |
+
lines=8,
|
854 |
+
label="Input Text",
|
855 |
+
placeholder="Enter your text here for detailed analysis...",
|
856 |
+
elem_id="input-text-llama"
|
857 |
+
)
|
858 |
+
|
859 |
+
with gr.Row():
|
860 |
+
analyze_button_llama = gr.Button("🔍 Analyze Content (Detailed)", variant="primary")
|
861 |
+
clear_button_llama = gr.Button("🧹 Clear", variant="secondary")
|
862 |
+
|
863 |
+
# Output area for Llama
|
864 |
+
output_json_llama = gr.JSON(label="Detailed Analysis Results")
|
865 |
+
|
866 |
+
# Button click events for Llama
|
867 |
+
analyze_button_llama.click(
|
868 |
+
fn=analyze_content_llama,
|
869 |
+
inputs=[text_input_llama],
|
870 |
+
outputs=[output_json_llama]
|
871 |
+
)
|
872 |
+
clear_button_llama.click(
|
873 |
+
fn=lambda: ("", None),
|
874 |
+
inputs=[],
|
875 |
+
outputs=[text_input_llama, output_json_llama]
|
876 |
+
)
|
877 |
+
|
878 |
+
# Movie Search and Analysis Tab
|
879 |
+
with gr.Tab("🍿 Movie Script Analysis", elem_id="movie-tab"):
|
880 |
+
gr.HTML(movie_search_html)
|
881 |
+
|
882 |
+
# Input for movie name
|
883 |
+
with gr.Row():
|
884 |
+
movie_name_input = gr.Textbox(
|
885 |
+
lines=1,
|
886 |
+
label="Movie Name",
|
887 |
+
placeholder="Enter the movie name to search and analyze...",
|
888 |
+
elem_id="movie-name-input"
|
889 |
+
)
|
890 |
+
|
891 |
+
# Analyze button for movie search
|
892 |
+
with gr.Row():
|
893 |
+
search_button = gr.Button("🔍 Search & Analyze Movie", variant="primary")
|
894 |
+
clear_movie_button = gr.Button("🧹 Clear", variant="secondary")
|
895 |
+
|
896 |
+
# Output area for movie analysis results
|
897 |
+
output_movie_json = gr.JSON(label="Movie Analysis Results")
|
898 |
+
|
899 |
+
# Button click events for movie search
|
900 |
+
search_button.click(
|
901 |
+
fn=search_movie_script,
|
902 |
+
inputs=[movie_name_input],
|
903 |
+
outputs=[output_movie_json]
|
904 |
+
)
|
905 |
+
clear_movie_button.click(
|
906 |
+
fn=lambda: ("", None),
|
907 |
+
inputs=[],
|
908 |
+
outputs=[movie_name_input, output_movie_json]
|
909 |
+
)
|
910 |
+
|
911 |
+
app.queue()
|
912 |
+
app.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
flask
|
2 |
+
flask_cors
|
3 |
+
torch
|
4 |
+
gradio
|
5 |
+
transformers
|
6 |
+
accelerate
|
7 |
+
safetensors
|
8 |
+
huggingface-hub
|
9 |
+
beautifulsoup4
|
10 |
+
fastapi
|
script_search_api.py
ADDED
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# script_search_api.py
|
2 |
+
from fastapi import FastAPI, HTTPException
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
import asyncio
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
from typing import Dict, Optional
|
7 |
+
from pydantic import BaseModel
|
8 |
+
from dataclasses import dataclass
|
9 |
+
import logging
|
10 |
+
import requests
|
11 |
+
from bs4 import BeautifulSoup
|
12 |
+
from difflib import get_close_matches
|
13 |
+
from model.analyzer import analyze_content
|
14 |
+
|
15 |
+
logging.basicConfig(level=logging.INFO)
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
+
|
18 |
+
app = FastAPI()
|
19 |
+
|
20 |
+
app.add_middleware(
|
21 |
+
CORSMiddleware,
|
22 |
+
allow_origins=["*"],
|
23 |
+
allow_credentials=True,
|
24 |
+
allow_methods=["*"],
|
25 |
+
allow_headers=["*"],
|
26 |
+
)
|
27 |
+
|
28 |
+
@dataclass
|
29 |
+
class ProgressState:
|
30 |
+
progress: float
|
31 |
+
status: str
|
32 |
+
timestamp: datetime
|
33 |
+
task_id: str
|
34 |
+
is_complete: bool = False
|
35 |
+
result: Optional[dict] = None
|
36 |
+
error: Optional[str] = None
|
37 |
+
|
38 |
+
class ProgressResponse(BaseModel):
|
39 |
+
progress: float
|
40 |
+
status: str
|
41 |
+
is_complete: bool
|
42 |
+
result: Optional[dict] = None
|
43 |
+
error: Optional[str] = None
|
44 |
+
|
45 |
+
# Global progress tracker
|
46 |
+
progress_tracker: Dict[str, ProgressState] = {}
|
47 |
+
|
48 |
+
BASE_URL = "https://imsdb.com"
|
49 |
+
ALL_SCRIPTS_URL = f"{BASE_URL}/all-scripts.html"
|
50 |
+
|
51 |
+
def create_task_id(movie_name: str) -> str:
|
52 |
+
"""Create a unique task ID for a movie analysis request"""
|
53 |
+
return f"{movie_name}-{datetime.now().timestamp()}"
|
54 |
+
|
55 |
+
async def cleanup_old_tasks():
|
56 |
+
"""Remove tasks older than 1 hour"""
|
57 |
+
while True:
|
58 |
+
current_time = datetime.now()
|
59 |
+
expired_tasks = [
|
60 |
+
task_id for task_id, state in progress_tracker.items()
|
61 |
+
if current_time - state.timestamp > timedelta(hours=1)
|
62 |
+
]
|
63 |
+
for task_id in expired_tasks:
|
64 |
+
del progress_tracker[task_id]
|
65 |
+
await asyncio.sleep(300) # Cleanup every 5 minutes
|
66 |
+
|
67 |
+
@app.on_event("startup")
|
68 |
+
async def startup_event():
|
69 |
+
"""Initialize the server and start cleanup task"""
|
70 |
+
progress_tracker.clear()
|
71 |
+
asyncio.create_task(cleanup_old_tasks())
|
72 |
+
logger.info("Server started, progress tracker initialized")
|
73 |
+
|
74 |
+
def update_progress(task_id: str, progress: float, status: str, result: Optional[dict] = None, error: Optional[str] = None):
|
75 |
+
"""Update progress state for a task"""
|
76 |
+
is_complete = progress >= 1.0
|
77 |
+
progress_tracker[task_id] = ProgressState(
|
78 |
+
progress=progress,
|
79 |
+
status=status,
|
80 |
+
timestamp=datetime.now(),
|
81 |
+
task_id=task_id,
|
82 |
+
is_complete=is_complete,
|
83 |
+
result=result,
|
84 |
+
error=error
|
85 |
+
)
|
86 |
+
logger.info(f"Task {task_id}: {status} (Progress: {progress * 100:.0f}%)")
|
87 |
+
|
88 |
+
@app.get("/api/start_analysis")
|
89 |
+
async def start_analysis(movie_name: str):
|
90 |
+
"""Start a new analysis task"""
|
91 |
+
task_id = create_task_id(movie_name)
|
92 |
+
update_progress(task_id, 0.0, "Starting analysis...")
|
93 |
+
|
94 |
+
# Start the analysis task in the background
|
95 |
+
asyncio.create_task(run_analysis(task_id, movie_name))
|
96 |
+
|
97 |
+
return {"task_id": task_id}
|
98 |
+
|
99 |
+
@app.get("/api/progress/{task_id}")
|
100 |
+
async def get_progress(task_id: str) -> ProgressResponse:
|
101 |
+
"""Get current progress for a task"""
|
102 |
+
if task_id not in progress_tracker:
|
103 |
+
raise HTTPException(status_code=404, detail="Task not found")
|
104 |
+
|
105 |
+
state = progress_tracker[task_id]
|
106 |
+
return ProgressResponse(
|
107 |
+
progress=state.progress,
|
108 |
+
status=state.status,
|
109 |
+
is_complete=state.is_complete,
|
110 |
+
result=state.result,
|
111 |
+
error=state.error
|
112 |
+
)
|
113 |
+
|
114 |
+
def find_movie_link(movie_name: str, soup: BeautifulSoup) -> str | None:
|
115 |
+
"""Find the closest matching movie link from the script database."""
|
116 |
+
movie_links = {link.text.strip().lower(): link['href'] for link in soup.find_all('a', href=True)}
|
117 |
+
close_matches = get_close_matches(movie_name.lower(), movie_links.keys(), n=1, cutoff=0.6)
|
118 |
+
|
119 |
+
if close_matches:
|
120 |
+
logger.info(f"Close match found: {close_matches[0]}")
|
121 |
+
return BASE_URL + movie_links[close_matches[0]]
|
122 |
+
|
123 |
+
logger.info("No close match found.")
|
124 |
+
return None
|
125 |
+
|
126 |
+
def find_script_link(soup: BeautifulSoup, movie_name: str) -> str | None:
|
127 |
+
"""Find the script download link for a given movie."""
|
128 |
+
patterns = [
|
129 |
+
f'Read "{movie_name}" Script',
|
130 |
+
f'Read "{movie_name.title()}" Script',
|
131 |
+
f'Read "{movie_name.upper()}" Script',
|
132 |
+
f'Read "{movie_name.lower()}" Script'
|
133 |
+
]
|
134 |
+
|
135 |
+
for link in soup.find_all('a', href=True):
|
136 |
+
link_text = link.text.strip()
|
137 |
+
if any(pattern.lower() in link_text.lower() for pattern in patterns):
|
138 |
+
return link['href']
|
139 |
+
elif all(word.lower() in link_text.lower() for word in ["Read", "Script", movie_name]):
|
140 |
+
return link['href']
|
141 |
+
return None
|
142 |
+
|
143 |
+
def fetch_script(movie_name: str) -> str | None:
|
144 |
+
"""Fetch and extract the script content for a given movie."""
|
145 |
+
# Initial page load
|
146 |
+
update_progress(movie_name, 0.1, "Fetching the script database...")
|
147 |
+
try:
|
148 |
+
response = requests.get(ALL_SCRIPTS_URL)
|
149 |
+
response.raise_for_status()
|
150 |
+
except requests.RequestException as e:
|
151 |
+
logger.error(f"Failed to load the main page: {str(e)}")
|
152 |
+
return None
|
153 |
+
|
154 |
+
# Search for movie
|
155 |
+
update_progress(movie_name, 0.2, "Searching for the movie...")
|
156 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
157 |
+
movie_link = find_movie_link(movie_name, soup)
|
158 |
+
|
159 |
+
if not movie_link:
|
160 |
+
logger.error(f"Script for '{movie_name}' not found.")
|
161 |
+
return None
|
162 |
+
|
163 |
+
# Fetch movie page
|
164 |
+
update_progress(movie_name, 0.3, "Loading movie details...")
|
165 |
+
try:
|
166 |
+
response = requests.get(movie_link)
|
167 |
+
response.raise_for_status()
|
168 |
+
except requests.RequestException as e:
|
169 |
+
logger.error(f"Failed to load the movie page: {str(e)}")
|
170 |
+
return None
|
171 |
+
|
172 |
+
# Find script link
|
173 |
+
update_progress(movie_name, 0.4, "Locating script download...")
|
174 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
175 |
+
script_link = find_script_link(soup, movie_name)
|
176 |
+
|
177 |
+
if not script_link:
|
178 |
+
logger.error(f"Unable to find script link for '{movie_name}'.")
|
179 |
+
return None
|
180 |
+
|
181 |
+
# Fetch script content
|
182 |
+
script_page_url = BASE_URL + script_link
|
183 |
+
update_progress(movie_name, 0.5, "Downloading script content...")
|
184 |
+
|
185 |
+
try:
|
186 |
+
response = requests.get(script_page_url)
|
187 |
+
response.raise_for_status()
|
188 |
+
except requests.RequestException as e:
|
189 |
+
logger.error(f"Failed to load the script: {str(e)}")
|
190 |
+
return None
|
191 |
+
|
192 |
+
# Extract script text
|
193 |
+
update_progress(movie_name, 0.6, "Extracting script text...")
|
194 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
195 |
+
script_content = soup.find('pre')
|
196 |
+
|
197 |
+
if script_content:
|
198 |
+
update_progress(movie_name, 0.7, "Script extracted successfully")
|
199 |
+
return script_content.get_text()
|
200 |
+
else:
|
201 |
+
logger.error("Failed to extract script content.")
|
202 |
+
return None
|
203 |
+
|
204 |
+
async def run_analysis(task_id: str, movie_name: str):
|
205 |
+
"""Run the actual analysis task"""
|
206 |
+
try:
|
207 |
+
# Fetch script
|
208 |
+
update_progress(task_id, 0.2, "Fetching script...")
|
209 |
+
script_text = fetch_script(movie_name)
|
210 |
+
if not script_text:
|
211 |
+
raise Exception("Script not found")
|
212 |
+
|
213 |
+
# Analyze content
|
214 |
+
update_progress(task_id, 0.6, "Analyzing content...")
|
215 |
+
result = await analyze_content(script_text)
|
216 |
+
|
217 |
+
# Complete
|
218 |
+
update_progress(task_id, 1.0, "Analysis complete", result=result)
|
219 |
+
|
220 |
+
except Exception as e:
|
221 |
+
logger.error(f"Error in analysis: {str(e)}", exc_info=True)
|
222 |
+
update_progress(task_id, 1.0, "Error occurred", error=str(e))
|
223 |
+
|
224 |
+
@app.get("/api/fetch_and_analyze")
|
225 |
+
async def fetch_and_analyze(movie_name: str):
|
226 |
+
"""Fetch and analyze a movie script, with progress tracking."""
|
227 |
+
try:
|
228 |
+
# Initialize progress
|
229 |
+
task_id = create_task_id(movie_name)
|
230 |
+
update_progress(task_id, 0.0, "Starting script search...")
|
231 |
+
|
232 |
+
# Fetch script
|
233 |
+
script_text = fetch_script(movie_name)
|
234 |
+
if not script_text:
|
235 |
+
raise HTTPException(status_code=404, detail="Script not found or error occurred")
|
236 |
+
|
237 |
+
# Analyze content
|
238 |
+
update_progress(task_id, 0.8, "Analyzing script content...")
|
239 |
+
result = await analyze_content(script_text)
|
240 |
+
|
241 |
+
# Finalize
|
242 |
+
update_progress(task_id, 1.0, "Analysis complete!")
|
243 |
+
return result
|
244 |
+
|
245 |
+
except Exception as e:
|
246 |
+
logger.error(f"Error in fetch_and_analyze: {str(e)}", exc_info=True)
|
247 |
+
# Clean up progress tracker in case of error
|
248 |
+
if movie_name in progress_tracker:
|
249 |
+
del progress_tracker[movie_name]
|
250 |
+
raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
|
251 |
+
|
252 |
+
@app.get("/api/progress")
|
253 |
+
def get_progress(movie_name: str):
|
254 |
+
"""Get the current progress and status for a movie analysis."""
|
255 |
+
if movie_name not in progress_tracker:
|
256 |
+
return {
|
257 |
+
"progress": 0,
|
258 |
+
"status": "Waiting to start..."
|
259 |
+
}
|
260 |
+
|
261 |
+
progress_info = progress_tracker[movie_name]
|
262 |
+
|
263 |
+
# Clean up old entries (optional)
|
264 |
+
current_time = datetime.now()
|
265 |
+
if (current_time - progress_info.timestamp).total_seconds() > 3600: # 1 hour timeout
|
266 |
+
del progress_tracker[movie_name]
|
267 |
+
return {
|
268 |
+
"progress": 0,
|
269 |
+
"status": "Session expired. Please try again."
|
270 |
+
}
|
271 |
+
|
272 |
+
return {
|
273 |
+
"progress": progress_info.progress,
|
274 |
+
"status": progress_info.status
|
275 |
+
}
|
276 |
+
|
277 |
+
if __name__ == "__main__":
|
278 |
+
import uvicorn
|
279 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|