1simo commited on
Commit
1b7f60b
·
verified ·
1 Parent(s): d14c9c8

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch # Optional, but good practice if using a PyTorch model
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ # --- 1. Load a simple, small pre-trained LLM and its tokenizer ---
6
+ # We'll use DistilGPT2 for speed and small size.
7
+ # You can replace this with another small model if you prefer.
8
+ model_name = "distilgpt2"
9
+ try:
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name)
12
+ # If you have a GPU, uncomment the next line
13
+ # model.to("cuda" if torch.cuda.is_available() else "cpu")
14
+ model_loaded = True
15
+ print(f"Successfully loaded model and tokenizer for: {model_name}")
16
+ except Exception as e:
17
+ print(f"Error loading model: {e}")
18
+ model_loaded = False
19
+ # Define dummy functions if model fails to load, so Gradio interface still launches
20
+ def generate_text_from_llm(prompt_text):
21
+ return "Error: Model could not be loaded. Please check server logs."
22
+ tokenizer = None # To avoid errors later if tokenizer specific functions are called
23
+
24
+ if model_loaded and tokenizer:
25
+ # Ensure pad_token is set if it's not already (important for generate)
26
+ if tokenizer.pad_token is None:
27
+ tokenizer.pad_token = tokenizer.eos_token
28
+ model.config.pad_token_id = model.config.eos_token_id
29
+
30
+ # --- 2. Define the LLM inference function ---
31
+ def generate_text_from_llm(prompt_text):
32
+ """
33
+ Generates a short text continuation using the loaded LLM.
34
+ """
35
+ if not prompt_text:
36
+ return "Please enter a starting prompt!"
37
+
38
+ try:
39
+ # Encode the input prompt
40
+ inputs = tokenizer.encode(prompt_text, return_tensors="pt", truncation=True, max_length=512)
41
+ # If you have a GPU, uncomment the next line
42
+ # inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
43
+
44
+ # Generate text
45
+ # max_length is the total length of prompt + generated text
46
+ # num_return_sequences=1 means we want one completion
47
+ # no_repeat_ngram_size helps avoid repetitive text
48
+ outputs = model.generate(
49
+ inputs,
50
+ max_length=len(inputs[0]) + 50, # Generate up to 50 new tokens
51
+ num_return_sequences=1,
52
+ pad_token_id=tokenizer.eos_token_id, # Use EOS token for padding during generation
53
+ no_repeat_ngram_size=2, # Avoid repeating 2-grams
54
+ early_stopping=True
55
+ )
56
+
57
+ # Decode the generated text
58
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+ # Return only the newly generated part (optional, can be tricky)
60
+ # For simplicity, we'll return the whole thing for now.
61
+ # To return only new text: return generated_text[len(prompt_text):].strip()
62
+ return generated_text
63
+
64
+ except Exception as e:
65
+ print(f"Error during generation: {e}")
66
+ return f"Error during text generation: {e}"
67
+
68
+ # --- 3. Create the Gradio Interface ---
69
+ demo = gr.Interface(
70
+ fn=generate_text_from_llm,
71
+ inputs=[
72
+ gr.Textbox(
73
+ label="Enter your prompt",
74
+ placeholder="Start typing here...",
75
+ lines=5
76
+ )
77
+ ],
78
+ outputs=[
79
+ gr.Textbox(label="LLM Generated Text", lines=10)
80
+ ],
81
+ title="📝 Simple LLM Text Generator",
82
+ description="Enter a prompt and a small LLM (DistilGPT2) will try to continue it. This is a basic demo for learning purposes.",
83
+ examples=[
84
+ ["Once upon a time, in a land far away,"],
85
+ ["The best way to learn programming is"],
86
+ ["Artificial intelligence is rapidly changing the world by"]
87
+ ],
88
+ theme=gr.themes.Soft() # You can try other themes like gr.themes.Default()
89
+ )
90
+
91
+ # --- 4. Launch the app ---
92
+ # When deploying to Hugging Face Spaces, they will run this launch() command.
93
+ # For local testing with a shareable link, use share=True.
94
+ if __name__ == "__main__":
95
+ if model_loaded:
96
+ demo.launch(debug=True, share=True) # share=True creates a temporary public link
97
+ else:
98
+ print("Model failed to load. Gradio app will run with an error message function.")
99
+ # Launch with the dummy function so the UI still appears
100
+ demo_error = gr.Interface(fn=lambda x: "Error: Model could not be loaded.", inputs="textbox", outputs="textbox", title="LLM Demo - MODEL LOAD ERROR")
101
+ demo_error.launch(debug=True, share=True)