Spaces:

1simo
/

my-simple-llm-demo

Sleeping

App Files Files Community

my-simple-llm-demo / app.py

1simo

Upload app.py with huggingface_hub

1b7f60b verified about 1 month ago

raw

history blame contribute delete

4.5 kB

	import gradio as gr
	import torch # Optional, but good practice if using a PyTorch model
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# --- 1. Load a simple, small pre-trained LLM and its tokenizer ---
	# We'll use DistilGPT2 for speed and small size.
	# You can replace this with another small model if you prefer.
	model_name = "distilgpt2"
	try:
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)
	# If you have a GPU, uncomment the next line
	# model.to("cuda" if torch.cuda.is_available() else "cpu")
	model_loaded = True
	print(f"Successfully loaded model and tokenizer for: {model_name}")
	except Exception as e:
	print(f"Error loading model: {e}")
	model_loaded = False
	# Define dummy functions if model fails to load, so Gradio interface still launches
	def generate_text_from_llm(prompt_text):
	return "Error: Model could not be loaded. Please check server logs."
	tokenizer = None # To avoid errors later if tokenizer specific functions are called

	if model_loaded and tokenizer:
	# Ensure pad_token is set if it's not already (important for generate)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	model.config.pad_token_id = model.config.eos_token_id

	# --- 2. Define the LLM inference function ---
	def generate_text_from_llm(prompt_text):
	"""
	Generates a short text continuation using the loaded LLM.
	"""
	if not prompt_text:
	return "Please enter a starting prompt!"

	try:
	# Encode the input prompt
	inputs = tokenizer.encode(prompt_text, return_tensors="pt", truncation=True, max_length=512)
	# If you have a GPU, uncomment the next line
	# inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")

	# Generate text
	# max_length is the total length of prompt + generated text
	# num_return_sequences=1 means we want one completion
	# no_repeat_ngram_size helps avoid repetitive text
	outputs = model.generate(
	inputs,
	max_length=len(inputs[0]) + 50, # Generate up to 50 new tokens
	num_return_sequences=1,
	pad_token_id=tokenizer.eos_token_id, # Use EOS token for padding during generation
	no_repeat_ngram_size=2, # Avoid repeating 2-grams
	early_stopping=True
	)

	# Decode the generated text
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Return only the newly generated part (optional, can be tricky)
	# For simplicity, we'll return the whole thing for now.
	# To return only new text: return generated_text[len(prompt_text):].strip()
	return generated_text

	except Exception as e:
	print(f"Error during generation: {e}")
	return f"Error during text generation: {e}"

	# --- 3. Create the Gradio Interface ---
	demo = gr.Interface(
	fn=generate_text_from_llm,
	inputs=[
	gr.Textbox(
	label="Enter your prompt",
	placeholder="Start typing here...",
	lines=5
	)
	],
	outputs=[
	gr.Textbox(label="LLM Generated Text", lines=10)
	],
	title="📝 Simple LLM Text Generator",
	description="Enter a prompt and a small LLM (DistilGPT2) will try to continue it. This is a basic demo for learning purposes.",
	examples=[
	["Once upon a time, in a land far away,"],
	["The best way to learn programming is"],
	["Artificial intelligence is rapidly changing the world by"]
	],
	theme=gr.themes.Soft() # You can try other themes like gr.themes.Default()
	)

	# --- 4. Launch the app ---
	# When deploying to Hugging Face Spaces, they will run this launch() command.
	# For local testing with a shareable link, use share=True.
	if __name__ == "__main__":
	if model_loaded:
	demo.launch(debug=True, share=True) # share=True creates a temporary public link
	else:
	print("Model failed to load. Gradio app will run with an error message function.")
	# Launch with the dummy function so the UI still appears
	demo_error = gr.Interface(fn=lambda x: "Error: Model could not be loaded.", inputs="textbox", outputs="textbox", title="LLM Demo - MODEL LOAD ERROR")
	demo_error.launch(debug=True, share=True)