Spaces:

1simo
/

my-simple-llm-demo

Sleeping

App Files Files Community

1simo commited on May 24

Commit

1b7f60b

verified ·

1 Parent(s): d14c9c8

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +101 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import gradio as gr
+import torch # Optional, but good practice if using a PyTorch model
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# --- 1. Load a simple, small pre-trained LLM and its tokenizer ---
+# We'll use DistilGPT2 for speed and small size.
+# You can replace this with another small model if you prefer.
+model_name = "distilgpt2"
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    # If you have a GPU, uncomment the next line
+    # model.to("cuda" if torch.cuda.is_available() else "cpu")
+    model_loaded = True
+    print(f"Successfully loaded model and tokenizer for: {model_name}")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    model_loaded = False
+    # Define dummy functions if model fails to load, so Gradio interface still launches
+    def generate_text_from_llm(prompt_text):
+        return "Error: Model could not be loaded. Please check server logs."
+    tokenizer = None # To avoid errors later if tokenizer specific functions are called
+if model_loaded and tokenizer:
+    # Ensure pad_token is set if it's not already (important for generate)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+        model.config.pad_token_id = model.config.eos_token_id
+    # --- 2. Define the LLM inference function ---
+    def generate_text_from_llm(prompt_text):
+        """
+        Generates a short text continuation using the loaded LLM.
+        """
+        if not prompt_text:
+            return "Please enter a starting prompt!"
+        try:
+            # Encode the input prompt
+            inputs = tokenizer.encode(prompt_text, return_tensors="pt", truncation=True, max_length=512)
+            # If you have a GPU, uncomment the next line
+            # inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
+            # Generate text
+            # max_length is the total length of prompt + generated text
+            # num_return_sequences=1 means we want one completion
+            # no_repeat_ngram_size helps avoid repetitive text
+            outputs = model.generate(
+                inputs,
+                max_length=len(inputs[0]) + 50, # Generate up to 50 new tokens
+                num_return_sequences=1,
+                pad_token_id=tokenizer.eos_token_id, # Use EOS token for padding during generation
+                no_repeat_ngram_size=2, # Avoid repeating 2-grams
+                early_stopping=True
+            )
+            # Decode the generated text
+            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Return only the newly generated part (optional, can be tricky)
+            # For simplicity, we'll return the whole thing for now.
+            # To return only new text: return generated_text[len(prompt_text):].strip()
+            return generated_text
+        except Exception as e:
+            print(f"Error during generation: {e}")
+            return f"Error during text generation: {e}"
+# --- 3. Create the Gradio Interface ---
+demo = gr.Interface(
+    fn=generate_text_from_llm,
+    inputs=[
+        gr.Textbox(
+            label="Enter your prompt",
+            placeholder="Start typing here...",
+            lines=5
+        )
+    ],
+    outputs=[
+        gr.Textbox(label="LLM Generated Text", lines=10)
+    ],
+    title="📝 Simple LLM Text Generator",
+    description="Enter a prompt and a small LLM (DistilGPT2) will try to continue it. This is a basic demo for learning purposes.",
+    examples=[
+        ["Once upon a time, in a land far away,"],
+        ["The best way to learn programming is"],
+        ["Artificial intelligence is rapidly changing the world by"]
+    ],
+    theme=gr.themes.Soft() # You can try other themes like gr.themes.Default()
+)
+# --- 4. Launch the app ---
+# When deploying to Hugging Face Spaces, they will run this launch() command.
+# For local testing with a shareable link, use share=True.
+if __name__ == "__main__":
+    if model_loaded:
+        demo.launch(debug=True, share=True) # share=True creates a temporary public link
+    else:
+        print("Model failed to load. Gradio app will run with an error message function.")
+        # Launch with the dummy function so the UI still appears
+        demo_error = gr.Interface(fn=lambda x: "Error: Model could not be loaded.", inputs="textbox", outputs="textbox", title="LLM Demo - MODEL LOAD ERROR")
+        demo_error.launch(debug=True, share=True)