draft-docker

Paused

File size: 1,152 Bytes

0efa81e
f424bed
ed53c37
3d53d43
0efa81e
3d53d43
8f95bbc
0efa81e
f71f3be
9dad4e7
 
0efa81e
c5369d3
3d53d43
0efa81e
 
 
 
 
 
 
 
72c2e54
 
0efa81e
72c2e54
 
0efa81e
 
c5369d3
3d53d43
3e783c2
c5369d3
 
 
 
 
0efa81e
 
72c2e54

import gradio as gr
from transformers import AutoTokenizer, AutoProcessor, VisionEncoderDecoderModel, TrOCRProcessor
from vllm import LLM, SamplingParams
from PIL import Image

# Load the language model and tokenizer from Hugging Face
model_name = "facebook/opt-125m"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Initialize vLLM with CPU configuration
vllm_model = LLM(model=model_name, tensor_parallel_size=1, device="cpu")

 

def generate_response(prompt, max_tokens, temperature, top_p):
    # Define sampling parameters
    sampling_params = SamplingParams(
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
    )
    
    # Generate text using vLLM (input is the raw string `prompt`)
    output = vllm_model.generate(prompt, sampling_params)
    
    # Extract and decode the generated tokens
    generated_text = output[0].outputs[0].text
    return generated_text

 

 
prompt =gr.Textbox()
max_tokens = gr.Textbox()
temperature = gr.Textbox()
top_p = gr.Textbox()
demo=gr.Interface(generate_response, inputs=[prompt, max_tokens,temperature, top_p], outputs="text")

# Launch the app
demo.launch()