import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig import torch model_name = "ajibawa-2023/Young-Children-Storyteller-Mistral-7B" # Quantization config for low-memory environments quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4" ) # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=quant_config, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True ) # Generate story function def generate_story(prompt, max_length=400, temperature=0.7, top_p=0.9): formatted_prompt = f"### Instruction:\nCreate a story for young children about: {prompt}\n\n### Response:\n" inputs = tokenizer.encode(formatted_prompt, return_tensors="pt").to(model.device) outputs = model.generate( inputs, max_length=max_length, temperature=temperature, top_p=top_p, do_sample=True, pad_token_id=tokenizer.eos_token_id, repetition_penalty=1.1 ) story = tokenizer.decode(outputs[0], skip_special_tokens=True) return story.split("### Response:")[-1].strip() # Gradio interface gr.Interface( fn=generate_story, inputs=gr.Textbox(label="Enter a story idea for children"), outputs=gr.Textbox(label="📖 Generated Story"), title="Young Children Story Generator", description="Give a topic like 'A dragon who loves to cook' and get a complete child-friendly story." ).launch()