Spaces:
Sleeping
Sleeping
Delete app.py
Browse files
app.py
DELETED
@@ -1,262 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import spaces
|
3 |
-
import torch
|
4 |
-
from transformers import AutoProcessor, Llama4ForConditionalGeneration
|
5 |
-
import fitz # PyMuPDF for PDF processing
|
6 |
-
import docx
|
7 |
-
from io import BytesIO
|
8 |
-
import os
|
9 |
-
from PIL import Image
|
10 |
-
import base64
|
11 |
-
|
12 |
-
# Initialize the model and processor
|
13 |
-
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
14 |
-
|
15 |
-
# Global variables to store model and processor
|
16 |
-
model = None
|
17 |
-
processor = None
|
18 |
-
|
19 |
-
def load_model():
|
20 |
-
"""Load the Llama 4 model and processor"""
|
21 |
-
global model, processor
|
22 |
-
if model is None:
|
23 |
-
processor = AutoProcessor.from_pretrained(model_id)
|
24 |
-
model = Llama4ForConditionalGeneration.from_pretrained(
|
25 |
-
model_id,
|
26 |
-
device_map="auto",
|
27 |
-
torch_dtype=torch.bfloat16,
|
28 |
-
attn_implementation="flex_attention"
|
29 |
-
)
|
30 |
-
return model, processor
|
31 |
-
|
32 |
-
# Define the six prompt templates
|
33 |
-
PROMPTS = {
|
34 |
-
"summaries": """Write a concise summary of the provided document. The summary should be 150β200 words, capturing the main ideas, key arguments, or findings, and central themes. Exclude minor details or examples unless critical to the core message. Use clear, neutral language, structuring the summary with an introductory sentence stating the document's purpose, followed by main points in a logical order. Conclude with a brief statement on the document's significance or broader implications. Ensure accuracy by directly referencing the document's content and avoid personal opinions.
|
35 |
-
|
36 |
-
Document content:
|
37 |
-
{content}""",
|
38 |
-
|
39 |
-
"outlines": """Create a detailed outline of the provided document. The outline should feature a clear hierarchy with main sections, subsections, and bullet points summarizing key concepts, arguments, or findings under each. Use Roman numerals for main sections and letters or numbers for subsections. Include a brief introductory statement (50β100 words) describing the document's scope and purpose. Ensure the outline is comprehensive, logically organized, and captures all major points from the document without extraneous details.
|
40 |
-
|
41 |
-
Document content:
|
42 |
-
{content}""",
|
43 |
-
|
44 |
-
"analysis": """Provide a critical analysis of the provided document, focusing on its main arguments, evidence, or methodology. The analysis should be 300β400 words, evaluating strengths and weaknesses, the author's assumptions, biases, or rhetorical strategies, and the document's relevance or impact in its field or broader context. Support your points with direct evidence from the document, such as quotes or data. Organize the analysis with an introduction, body paragraphs (strengths, weaknesses, implications), and a conclusion. Maintain an objective tone, avoiding excessive summarization, and ensure all claims are grounded in the document's content.
|
45 |
-
|
46 |
-
Document content:
|
47 |
-
{content}""",
|
48 |
-
|
49 |
-
"study_guides": """Develop a comprehensive study guide for the provided document. The guide should include: (1) a brief overview (50β100 words) of the document's scope and purpose, (2) a list of 5β7 key themes or concepts with concise explanations (50β100 words each), (3) a glossary of 10β15 essential terms from the document with clear definitions, (4) 3β5 critical discussion questions to encourage deeper thinking, and (5) a checklist of key takeaways or study tips. Organize the guide with clear headings and bullet points, ensuring it is student-friendly and focused on retention and understanding.
|
50 |
-
|
51 |
-
Document content:
|
52 |
-
{content}""",
|
53 |
-
|
54 |
-
"tables": """Create a comparative table based on the provided document, synthesizing key elements (e.g., theories, findings, arguments, or concepts) across relevant criteria (e.g., assumptions, applications, strengths, weaknesses). The table should compare 3β5 elements, using rows for each element and columns for each criterion. Include a brief introductory paragraph (50β100 words) explaining the table's purpose and scope. Ensure the table is concise, visually organized, and populated with precise information directly from the document.
|
55 |
-
|
56 |
-
Document content:
|
57 |
-
{content}""",
|
58 |
-
|
59 |
-
"questions": """Generate a set of 10 high-quality questions based on the provided document. Include: (1) 3 factual questions to test recall of key details, (2) 3 conceptual questions to assess understanding of main ideas or arguments, (3) 2 analytical questions to encourage critical thinking about the document's implications or weaknesses, and (4) 2 open-ended questions to prompt discussion or creative reflection. Label each question by type (factual, conceptual, analytical, open-ended), and ensure questions are clear, specific, and aligned with the document's core themes.
|
60 |
-
|
61 |
-
Document content:
|
62 |
-
{content}"""
|
63 |
-
}
|
64 |
-
|
65 |
-
# Enhanced document processing functions
|
66 |
-
def extract_text_and_images_from_pdf(file_path):
|
67 |
-
"""Extract both text and images from PDF file"""
|
68 |
-
doc = fitz.open(file_path)
|
69 |
-
text = ""
|
70 |
-
images = []
|
71 |
-
|
72 |
-
for page_num in range(len(doc)):
|
73 |
-
page = doc[page_num]
|
74 |
-
|
75 |
-
# Extract text
|
76 |
-
text += f"\n--- Page {page_num + 1} ---\n"
|
77 |
-
text += page.get_text()
|
78 |
-
|
79 |
-
# Extract images
|
80 |
-
image_list = page.get_images()
|
81 |
-
for img_index, img in enumerate(image_list):
|
82 |
-
try:
|
83 |
-
# Get image
|
84 |
-
xref = img[0]
|
85 |
-
pix = fitz.Pixmap(doc, xref)
|
86 |
-
|
87 |
-
if pix.n - pix.alpha < 4: # GRAY or RGB
|
88 |
-
# Convert to PIL Image
|
89 |
-
img_data = pix.samples
|
90 |
-
img_pil = Image.frombytes(pix.colorspace.name, [pix.width, pix.height], img_data)
|
91 |
-
images.append(img_pil)
|
92 |
-
text += f"\n[IMAGE {len(images)} extracted from page {page_num + 1}]\n"
|
93 |
-
|
94 |
-
pix = None # free memory
|
95 |
-
except Exception as e:
|
96 |
-
text += f"\n[IMAGE ERROR: Could not process image {img_index + 1} on page {page_num + 1}: {str(e)}]\n"
|
97 |
-
|
98 |
-
doc.close()
|
99 |
-
return text, images
|
100 |
-
|
101 |
-
def extract_text_from_docx(file_path):
|
102 |
-
"""Extract text from DOCX file"""
|
103 |
-
doc = docx.Document(file_path)
|
104 |
-
text = ""
|
105 |
-
for paragraph in doc.paragraphs:
|
106 |
-
text += paragraph.text + "\n"
|
107 |
-
return text, []
|
108 |
-
|
109 |
-
def extract_text_from_txt(file_path):
|
110 |
-
"""Extract text from TXT file"""
|
111 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
112 |
-
return file.read(), []
|
113 |
-
|
114 |
-
def process_document(file):
|
115 |
-
"""Process uploaded document and extract text/images"""
|
116 |
-
if file is None:
|
117 |
-
return "", []
|
118 |
-
|
119 |
-
file_path = file.name
|
120 |
-
file_extension = os.path.splitext(file_path)[1].lower()
|
121 |
-
|
122 |
-
try:
|
123 |
-
if file_extension == '.pdf':
|
124 |
-
return extract_text_and_images_from_pdf(file_path)
|
125 |
-
elif file_extension == '.docx':
|
126 |
-
return extract_text_from_docx(file_path)
|
127 |
-
elif file_extension == '.txt':
|
128 |
-
return extract_text_from_txt(file_path)
|
129 |
-
else:
|
130 |
-
return "Unsupported file format. Please upload PDF, DOCX, or TXT files.", []
|
131 |
-
except Exception as e:
|
132 |
-
return f"Error processing file: {str(e)}", []
|
133 |
-
|
134 |
-
# AI processing function with ZeroGPU
|
135 |
-
@spaces.GPU
|
136 |
-
def generate_content(text_input, file_input, task_type):
|
137 |
-
"""Generate content using Llama 4 based on task type"""
|
138 |
-
# Load model
|
139 |
-
model, processor = load_model()
|
140 |
-
|
141 |
-
# Get text content and images
|
142 |
-
if file_input is not None:
|
143 |
-
content, images = process_document(file_input)
|
144 |
-
else:
|
145 |
-
content = text_input
|
146 |
-
images = []
|
147 |
-
|
148 |
-
if not content:
|
149 |
-
return "Please provide text input or upload a file."
|
150 |
-
|
151 |
-
# Get the appropriate prompt
|
152 |
-
prompt_template = PROMPTS.get(task_type, PROMPTS["summaries"])
|
153 |
-
prompt = prompt_template.format(content=content)
|
154 |
-
|
155 |
-
try:
|
156 |
-
# Prepare multimodal input
|
157 |
-
message_content = [{"type": "text", "text": prompt}]
|
158 |
-
|
159 |
-
# Add images if available
|
160 |
-
for i, img in enumerate(images[:5]): # Limit to 5 images
|
161 |
-
# Convert PIL image to base64 for the processor
|
162 |
-
buffered = BytesIO()
|
163 |
-
img.save(buffered, format="PNG")
|
164 |
-
img_b64 = base64.b64encode(buffered.getvalue()).decode()
|
165 |
-
img_url = f"data:image/png;base64,{img_b64}"
|
166 |
-
message_content.append({"type": "image", "url": img_url})
|
167 |
-
|
168 |
-
messages = [{"role": "user", "content": message_content}]
|
169 |
-
|
170 |
-
# Process with the multimodal processor
|
171 |
-
inputs = processor.apply_chat_template(
|
172 |
-
messages,
|
173 |
-
add_generation_prompt=True,
|
174 |
-
tokenize=True,
|
175 |
-
return_dict=True,
|
176 |
-
return_tensors="pt"
|
177 |
-
).to(model.device)
|
178 |
-
|
179 |
-
# Generate response
|
180 |
-
with torch.no_grad():
|
181 |
-
outputs = model.generate(
|
182 |
-
**inputs,
|
183 |
-
max_new_tokens=2048,
|
184 |
-
temperature=0.7,
|
185 |
-
do_sample=True,
|
186 |
-
top_p=0.9,
|
187 |
-
repetition_penalty=1.1
|
188 |
-
)
|
189 |
-
|
190 |
-
# Decode the response
|
191 |
-
response = processor.batch_decode(
|
192 |
-
outputs[:, inputs["input_ids"].shape[-1]:],
|
193 |
-
skip_special_tokens=True
|
194 |
-
)[0]
|
195 |
-
|
196 |
-
return response
|
197 |
-
|
198 |
-
except Exception as e:
|
199 |
-
return f"Error generating content: {str(e)}"
|
200 |
-
|
201 |
-
# Main Gradio interface
|
202 |
-
with gr.Blocks(title="Document Study Assistant", theme=gr.themes.Soft()) as app:
|
203 |
-
gr.Markdown("# π Document Study Assistant")
|
204 |
-
gr.Markdown("Upload documents or paste text to generate summaries, outlines, analysis, study guides, tables, and questions using Llama 4.")
|
205 |
-
gr.Markdown("**β¨ Now with full multimodal support!** PDFs with images will be analyzed completely.")
|
206 |
-
|
207 |
-
with gr.Row():
|
208 |
-
with gr.Column(scale=1):
|
209 |
-
# Input section
|
210 |
-
gr.Markdown("### Input")
|
211 |
-
text_input = gr.Textbox(
|
212 |
-
label="Paste text here",
|
213 |
-
placeholder="Paste your text content here...",
|
214 |
-
lines=10,
|
215 |
-
max_lines=20
|
216 |
-
)
|
217 |
-
|
218 |
-
gr.Markdown("**OR**")
|
219 |
-
|
220 |
-
file_input = gr.File(
|
221 |
-
label="Upload Document",
|
222 |
-
file_types=[".pdf", ".docx", ".txt"],
|
223 |
-
file_count="single"
|
224 |
-
)
|
225 |
-
|
226 |
-
# Task selection
|
227 |
-
gr.Markdown("### Select Task")
|
228 |
-
task_type = gr.Radio(
|
229 |
-
choices=[
|
230 |
-
("π Summaries", "summaries"),
|
231 |
-
("π Outlines", "outlines"),
|
232 |
-
("π Analysis", "analysis"),
|
233 |
-
("π Study Guides", "study_guides"),
|
234 |
-
("π Tables", "tables"),
|
235 |
-
("β Questions", "questions")
|
236 |
-
],
|
237 |
-
value="summaries",
|
238 |
-
label="Choose what to generate"
|
239 |
-
)
|
240 |
-
|
241 |
-
generate_btn = gr.Button("π Generate", variant="primary", size="lg")
|
242 |
-
|
243 |
-
with gr.Column(scale=1):
|
244 |
-
# Output section
|
245 |
-
gr.Markdown("### Output")
|
246 |
-
output = gr.Textbox(
|
247 |
-
label="Generated Content",
|
248 |
-
lines=25,
|
249 |
-
max_lines=50,
|
250 |
-
show_copy_button=True
|
251 |
-
)
|
252 |
-
|
253 |
-
# Connect the generate button to the function
|
254 |
-
generate_btn.click(
|
255 |
-
fn=generate_content,
|
256 |
-
inputs=[text_input, file_input, task_type],
|
257 |
-
outputs=output,
|
258 |
-
show_progress=True
|
259 |
-
)
|
260 |
-
|
261 |
-
if __name__ == "__main__":
|
262 |
-
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|