Spaces:
Sleeping
Sleeping
File size: 10,950 Bytes
8c29564 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
import gradio as gr
import spaces
from huggingface_hub import InferenceClient
import fitz # PyMuPDF for PDF processing
import docx
import os
# Initialize the Llama 4 model client with Cerebras provider
try:
client = InferenceClient(
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
provider="cerebras",
token=os.getenv("HF_TOKEN"),
)
except Exception as e:
print(f"Error initializing Cerebras client: {e}")
# Fallback to default provider
client = InferenceClient("meta-llama/Llama-4-Scout-17B-16E-Instruct")
# Define the six prompt templates + Custom Q&A
PROMPTS = {
"summaries": """Write a concise summary of the provided document. The summary should be 150β200 words, capturing the main ideas, key arguments, or findings, and central themes. Exclude minor details or examples unless critical to the core message. Use clear, neutral language, structuring the summary with an introductory sentence stating the document's purpose, followed by main points in a logical order. Conclude with a brief statement on the document's significance or broader implications. Ensure accuracy by directly referencing the document's content and avoid personal opinions.
Document content:
{content}""",
"outlines": """Create a detailed outline of the provided document. The outline should feature a clear hierarchy with main sections, subsections, and bullet points summarizing key concepts, arguments, or findings under each. Use Roman numerals for main sections and letters or numbers for subsections. Include a brief introductory statement (50β100 words) describing the document's scope and purpose. Ensure the outline is comprehensive, logically organized, and captures all major points from the document without extraneous details.
Document content:
{content}""",
"analysis": """Provide a critical analysis of the provided document, focusing on its main arguments, evidence, or methodology. The analysis should be 300β400 words, evaluating strengths and weaknesses, the author's assumptions, biases, or rhetorical strategies, and the document's relevance or impact in its field or broader context. Support your points with direct evidence from the document, such as quotes or data. Organize the analysis with an introduction, body paragraphs (strengths, weaknesses, implications), and a conclusion. Maintain an objective tone, avoiding excessive summarization, and ensure all claims are grounded in the document's content.
Document content:
{content}""",
"study_guides": """Develop a comprehensive study guide for the provided document. The guide should include: (1) a brief overview (50β100 words) of the document's scope and purpose, (2) a list of 5β7 key themes or concepts with concise explanations (50β100 words each), (3) a glossary of 10β15 essential terms from the document with clear definitions, (4) 3β5 critical discussion questions to encourage deeper thinking, and (5) a checklist of key takeaways or study tips. Organize the guide with clear headings and bullet points, ensuring it is student-friendly and focused on retention and understanding.
Document content:
{content}""",
"tables": """Create a comparative table based on the provided document, synthesizing key elements (e.g., theories, findings, arguments, or concepts) across relevant criteria (e.g., assumptions, applications, strengths, weaknesses). The table should compare 3β5 elements, using rows for each element and columns for each criterion. Include a brief introductory paragraph (50β100 words) explaining the table's purpose and scope. Ensure the table is concise, visually organized, and populated with precise information directly from the document.
Document content:
{content}""",
"questions": """Generate a set of 10 high-quality questions based on the provided document. Include: (1) 3 factual questions to test recall of key details, (2) 3 conceptual questions to assess understanding of main ideas or arguments, (3) 2 analytical questions to encourage critical thinking about the document's implications or weaknesses, and (4) 2 open-ended questions to prompt discussion or creative reflection. Label each question by type (factual, conceptual, analytical, open-ended), and ensure questions are clear, specific, and aligned with the document's core themes.
Document content:
{content}""",
"custom_qa": """Based on the provided document, answer the following specific question thoroughly and accurately. Provide detailed information from the document that directly addresses the question. If the document doesn't contain enough information to fully answer the question, state what information is available and note what's missing.
Question: {question}
Document content:
{content}"""
}
# Document processing functions
def extract_text_from_pdf(file_path):
"""Extract text from PDF file"""
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
doc.close()
return text
def extract_text_from_docx(file_path):
"""Extract text from DOCX file"""
doc = docx.Document(file_path)
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
return text
def extract_text_from_txt(file_path):
"""Extract text from TXT file"""
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def process_document(file):
"""Process uploaded document and extract text"""
if file is None:
return ""
file_path = file.name
file_extension = os.path.splitext(file_path)[1].lower()
try:
if file_extension == '.pdf':
return extract_text_from_pdf(file_path)
elif file_extension == '.docx':
return extract_text_from_docx(file_path)
elif file_extension == '.txt':
return extract_text_from_txt(file_path)
else:
return "Unsupported file format. Please upload PDF, DOCX, or TXT files."
except Exception as e:
return f"Error processing file: {str(e)}"
# AI processing function with ZeroGPU - UPDATED for Custom Q&A
@spaces.GPU
def generate_content(text_input, file_input, task_type, custom_question=""):
"""Generate content using Llama 4 based on task type"""
# Get text content
if file_input is not None:
content = process_document(file_input)
else:
content = text_input
if not content:
return "Please provide text input or upload a file."
# Handle custom Q&A differently
if task_type == "custom_qa":
if not custom_question.strip():
return "Please enter a question for custom Q&A."
prompt_template = PROMPTS["custom_qa"]
prompt = prompt_template.format(question=custom_question, content=content)
else:
# Get the appropriate prompt for other tasks
prompt_template = PROMPTS.get(task_type, PROMPTS["summaries"])
prompt = prompt_template.format(content=content)
try:
# Use chat completion which is more reliable
messages = [
{"role": "user", "content": prompt}
]
response = client.chat_completion(
messages=messages,
max_tokens=2048,
temperature=0.7,
top_p=0.9
)
return response.choices[0].message.content
except Exception as e:
return f"Error generating content: {str(e)}"
# Function to show/hide custom question input
def update_question_visibility(task_type):
if task_type == "custom_qa":
return gr.update(visible=True)
else:
return gr.update(visible=False)
# Main Gradio interface - UPDATED for Custom Q&A
with gr.Blocks(title="Document Study Assistant", theme=gr.themes.Soft()) as app:
gr.Markdown("# π Document Study Assistant")
gr.Markdown("Upload documents or paste text to generate summaries, outlines, analysis, study guides, tables, questions, or ask custom questions using Llama 4.")
with gr.Row():
with gr.Column(scale=1):
# Input section
gr.Markdown("### Input")
text_input = gr.Textbox(
label="Paste text here",
placeholder="Paste your text content here...",
lines=10,
max_lines=20
)
gr.Markdown("**OR**")
file_input = gr.File(
label="Upload Document",
file_types=[".pdf", ".docx", ".txt"],
file_count="single"
)
# Task selection - UPDATED with Custom Q&A
gr.Markdown("### Select Task")
task_type = gr.Radio(
choices=[
("π Summaries", "summaries"),
("π Outlines", "outlines"),
("π Analysis", "analysis"),
("π Study Guides", "study_guides"),
("π Tables", "tables"),
("β Questions", "questions"),
("π¬ Custom Q&A", "custom_qa")
],
value="summaries",
label="Choose what to generate"
)
# Custom question input (hidden by default) - NEW
with gr.Row(visible=False) as question_row:
custom_question = gr.Textbox(
label="Your Question",
placeholder="Ask a specific question about the document...",
lines=3
)
generate_btn = gr.Button("π Generate", variant="primary", size="lg")
with gr.Column(scale=1):
# Output section
gr.Markdown("### Output")
output = gr.Textbox(
label="Generated Content",
lines=25,
max_lines=50,
show_copy_button=True
)
# Help text for Custom Q&A - NEW
gr.Markdown("""
## π‘ Custom Q&A Examples
**Try asking questions like:**
- "What are the main arguments presented?"
- "How does the author define [specific term]?"
- "What evidence supports the conclusion?"
- "What are the limitations mentioned?"
- "What methodology was used?"
""")
# Show/hide custom question based on task selection - NEW
task_type.change(
fn=update_question_visibility,
inputs=[task_type],
outputs=[question_row]
)
# Connect the generate button to the function - UPDATED
generate_btn.click(
fn=generate_content,
inputs=[text_input, file_input, task_type, custom_question],
outputs=output,
show_progress=True
)
if __name__ == "__main__":
app.launch() |