Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch # Optional, but good practice if using a PyTorch model
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
|
5 |
+
# --- 1. Load a simple, small pre-trained LLM and its tokenizer ---
|
6 |
+
# We'll use DistilGPT2 for speed and small size.
|
7 |
+
# You can replace this with another small model if you prefer.
|
8 |
+
model_name = "distilgpt2"
|
9 |
+
try:
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
12 |
+
# If you have a GPU, uncomment the next line
|
13 |
+
# model.to("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
+
model_loaded = True
|
15 |
+
print(f"Successfully loaded model and tokenizer for: {model_name}")
|
16 |
+
except Exception as e:
|
17 |
+
print(f"Error loading model: {e}")
|
18 |
+
model_loaded = False
|
19 |
+
# Define dummy functions if model fails to load, so Gradio interface still launches
|
20 |
+
def generate_text_from_llm(prompt_text):
|
21 |
+
return "Error: Model could not be loaded. Please check server logs."
|
22 |
+
tokenizer = None # To avoid errors later if tokenizer specific functions are called
|
23 |
+
|
24 |
+
if model_loaded and tokenizer:
|
25 |
+
# Ensure pad_token is set if it's not already (important for generate)
|
26 |
+
if tokenizer.pad_token is None:
|
27 |
+
tokenizer.pad_token = tokenizer.eos_token
|
28 |
+
model.config.pad_token_id = model.config.eos_token_id
|
29 |
+
|
30 |
+
# --- 2. Define the LLM inference function ---
|
31 |
+
def generate_text_from_llm(prompt_text):
|
32 |
+
"""
|
33 |
+
Generates a short text continuation using the loaded LLM.
|
34 |
+
"""
|
35 |
+
if not prompt_text:
|
36 |
+
return "Please enter a starting prompt!"
|
37 |
+
|
38 |
+
try:
|
39 |
+
# Encode the input prompt
|
40 |
+
inputs = tokenizer.encode(prompt_text, return_tensors="pt", truncation=True, max_length=512)
|
41 |
+
# If you have a GPU, uncomment the next line
|
42 |
+
# inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
|
43 |
+
|
44 |
+
# Generate text
|
45 |
+
# max_length is the total length of prompt + generated text
|
46 |
+
# num_return_sequences=1 means we want one completion
|
47 |
+
# no_repeat_ngram_size helps avoid repetitive text
|
48 |
+
outputs = model.generate(
|
49 |
+
inputs,
|
50 |
+
max_length=len(inputs[0]) + 50, # Generate up to 50 new tokens
|
51 |
+
num_return_sequences=1,
|
52 |
+
pad_token_id=tokenizer.eos_token_id, # Use EOS token for padding during generation
|
53 |
+
no_repeat_ngram_size=2, # Avoid repeating 2-grams
|
54 |
+
early_stopping=True
|
55 |
+
)
|
56 |
+
|
57 |
+
# Decode the generated text
|
58 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
59 |
+
# Return only the newly generated part (optional, can be tricky)
|
60 |
+
# For simplicity, we'll return the whole thing for now.
|
61 |
+
# To return only new text: return generated_text[len(prompt_text):].strip()
|
62 |
+
return generated_text
|
63 |
+
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Error during generation: {e}")
|
66 |
+
return f"Error during text generation: {e}"
|
67 |
+
|
68 |
+
# --- 3. Create the Gradio Interface ---
|
69 |
+
demo = gr.Interface(
|
70 |
+
fn=generate_text_from_llm,
|
71 |
+
inputs=[
|
72 |
+
gr.Textbox(
|
73 |
+
label="Enter your prompt",
|
74 |
+
placeholder="Start typing here...",
|
75 |
+
lines=5
|
76 |
+
)
|
77 |
+
],
|
78 |
+
outputs=[
|
79 |
+
gr.Textbox(label="LLM Generated Text", lines=10)
|
80 |
+
],
|
81 |
+
title="📝 Simple LLM Text Generator",
|
82 |
+
description="Enter a prompt and a small LLM (DistilGPT2) will try to continue it. This is a basic demo for learning purposes.",
|
83 |
+
examples=[
|
84 |
+
["Once upon a time, in a land far away,"],
|
85 |
+
["The best way to learn programming is"],
|
86 |
+
["Artificial intelligence is rapidly changing the world by"]
|
87 |
+
],
|
88 |
+
theme=gr.themes.Soft() # You can try other themes like gr.themes.Default()
|
89 |
+
)
|
90 |
+
|
91 |
+
# --- 4. Launch the app ---
|
92 |
+
# When deploying to Hugging Face Spaces, they will run this launch() command.
|
93 |
+
# For local testing with a shareable link, use share=True.
|
94 |
+
if __name__ == "__main__":
|
95 |
+
if model_loaded:
|
96 |
+
demo.launch(debug=True, share=True) # share=True creates a temporary public link
|
97 |
+
else:
|
98 |
+
print("Model failed to load. Gradio app will run with an error message function.")
|
99 |
+
# Launch with the dummy function so the UI still appears
|
100 |
+
demo_error = gr.Interface(fn=lambda x: "Error: Model could not be loaded.", inputs="textbox", outputs="textbox", title="LLM Demo - MODEL LOAD ERROR")
|
101 |
+
demo_error.launch(debug=True, share=True)
|