File size: 1,152 Bytes
0efa81e
f424bed
ed53c37
3d53d43
0efa81e
3d53d43
8f95bbc
0efa81e
f71f3be
9dad4e7
 
0efa81e
c5369d3
3d53d43
0efa81e
 
 
 
 
 
 
 
72c2e54
 
0efa81e
72c2e54
 
0efa81e
 
c5369d3
3d53d43
3e783c2
c5369d3
 
 
 
 
0efa81e
 
72c2e54
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from transformers import AutoTokenizer, AutoProcessor, VisionEncoderDecoderModel, TrOCRProcessor
from vllm import LLM, SamplingParams
from PIL import Image

# Load the language model and tokenizer from Hugging Face
model_name = "facebook/opt-125m"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Initialize vLLM with CPU configuration
vllm_model = LLM(model=model_name, tensor_parallel_size=1, device="cpu")

 

def generate_response(prompt, max_tokens, temperature, top_p):
    # Define sampling parameters
    sampling_params = SamplingParams(
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
    )
    
    # Generate text using vLLM (input is the raw string `prompt`)
    output = vllm_model.generate(prompt, sampling_params)
    
    # Extract and decode the generated tokens
    generated_text = output[0].outputs[0].text
    return generated_text

 

 
prompt =gr.Textbox()
max_tokens = gr.Textbox()
temperature = gr.Textbox()
top_p = gr.Textbox()
demo=gr.Interface(generate_response, inputs=[prompt, max_tokens,temperature, top_p], outputs="text")

# Launch the app
demo.launch()