Spaces:

inclusionAI
/

Ling-lite-1.5

Running

App Files Files Community

雷娃 commited on Jun 24

Commit

f00ccef

1 Parent(s): 2ba773c

add API access to Ling service

Browse files

Files changed (3) hide show

app.py +14 -28
app_api.py +92 -0
app_hf_model.py +106 -0

app.py CHANGED Viewed

@@ -4,45 +4,31 @@ from threading import Thread
 import gradio as gr
 import re
 import torch
-# load model and tokenizer
-model_name = "inclusionAI/Ling-lite-1.5"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype="auto",
-    device_map="auto",
-    trust_remote_code=True
-).eval()
 # define chat function
 def chat(user_input, max_new_tokens=2048):
     # chat history
-    messages = [
         {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
         {"role": "user", "content": user_input}
     ]
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    # encode the input prompt
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    #create streamer
-    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
-    def generate():
-        model.generate(**inputs, max_new_tokens=max_new_tokens, streamer=streamer)
-    thread = Thread(target=generate)
-    thread.start()
-    start_idx = len("SYSTEM") + len(messages[0]["content"]) + len("HUMAN") + len(user_input) + len("ASSISTANT")
-    generated_text = ""
-    for new_text in streamer:
-        generated_text += new_text
-        yield generated_text[start_idx:]
-    thread.join()
 # Create a custom layout using Blocks
 with gr.Blocks(css="""

 import gradio as gr
 import re
 import torch
+from openai import OpenAI
+client = OpenAI(
+    api_key="sk-420ab66020704eabbe37501ec39b7a2b",
+    base_url="https://bailingchat.alipay.com",
+)
 # define chat function
 def chat(user_input, max_new_tokens=2048):
     # chat history
+    messages_template = [
         {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
         {"role": "user", "content": user_input}
     ]
+    response = client.chat.completions.create(
+        model="Ling-lite-1.5-250604",
+        messages=messages_template,
+        max_tokens=max_new_tokens,
+        temperature=0.7,
+        top_p=1,
+    )
+    yield response.choices[0].message.content
 # Create a custom layout using Blocks
 with gr.Blocks(css="""

app_api.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# app.py
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+from threading import Thread
+import gradio as gr
+import re
+import torch
+from openai import OpenAI
+client = OpenAI(
+    api_key="sk-420ab66020704eabbe37501ec39b7a2b",
+    base_url="https://bailingchat.alipay.com",
+)
+# define chat function
+def chat(user_input, max_new_tokens=2048):
+    # chat history
+    messages_template = [
+        {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
+        {"role": "user", "content": user_input}
+    ]
+    response = client.chat.completions.create(
+        model="Ling-lite-1.5-250604",
+        messages=messages_template,
+        max_tokens=max_new_tokens,
+        temperature=0.7,
+        top_p=1,
+    )
+    yield response.choices[0].message.content
+# Create a custom layout using Blocks
+with gr.Blocks(css="""
+    #markdown-output {
+        height: 300px;
+        overflow-y: auto;
+        border: 1px solid #ddd;
+        padding: 10px;
+    }
+""") as demo:
+    gr.Markdown(
+        "## Ling-lite-1.5 AI Assistant\n"
+        "Based on [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5) "
+    )
+    with gr.Row():
+        max_tokens_slider = gr.Slider(minimum=128, maximum=2048, step=16, label="Generated length")
+#    output_box = gr.Textbox(lines=10, label="Response")
+    output_box = gr.Markdown(label="Response", elem_id="markdown-output")
+    input_box = gr.Textbox(lines=8, label="Input you question")
+    examples = gr.Examples(
+        examples=[
+            ["Introducing the basic concepts of large language models"],
+            ["How to solve long context dependencies in math problems?"]
+        ],
+        inputs=input_box
+    )
+    interface = gr.Interface(
+        fn=chat,
+        inputs=[input_box, max_tokens_slider],
+        outputs=output_box,
+        live=False  # disable auto-triggering on input change
+    )
+# launch Gradio Service
+demo.queue()
+demo.launch()
+# Construct Gradio Interface
+#interface = gr.Interface(
+#    fn=chat,
+#    inputs=[
+#        gr.Textbox(lines=8, label="输入你的问题"),
+#        gr.Slider(minimum=100, maximum=102400, step=50, label="生成长度")
+#    ],
+#    outputs=[
+#        gr.Textbox(lines=8, label="模型回复")
+#    ],
+#    title="Ling-lite-1.5 AI助手",
+#    description="基于 [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5)  的对话式文本生成演示。",
+#    examples=[
+#        ["介绍大型语言模型的基本概念"],
+#        ["如何解决数学问题中的长上下文依赖？"]
+#    ]
+#)
+# launch Gradion Service
+#interface.launch()

app_hf_model.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# app.py
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+from threading import Thread
+import gradio as gr
+import re
+import torch
+# load model and tokenizer
+model_name = "inclusionAI/Ling-lite-1.5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto",
+    trust_remote_code=True
+).eval()
+# define chat function
+def chat(user_input, max_new_tokens=2048):
+    # chat history
+    messages = [
+        {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
+        {"role": "user", "content": user_input}
+    ]
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    # encode the input prompt
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    #create streamer
+    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
+    def generate():
+        model.generate(**inputs, max_new_tokens=max_new_tokens, streamer=streamer)
+    thread = Thread(target=generate)
+    thread.start()
+    start_idx = len("SYSTEM") + len(messages[0]["content"]) + len("HUMAN") + len(user_input) + len("ASSISTANT")
+    generated_text = ""
+    for new_text in streamer:
+        generated_text += new_text
+        yield generated_text[start_idx:]
+    thread.join()
+# Create a custom layout using Blocks
+with gr.Blocks(css="""
+    #markdown-output {
+        height: 300px;
+        overflow-y: auto;
+        border: 1px solid #ddd;
+        padding: 10px;
+    }
+""") as demo:
+    gr.Markdown(
+        "## Ling-lite-1.5 AI Assistant\n"
+        "Based on [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5) "
+    )
+    with gr.Row():
+        max_tokens_slider = gr.Slider(minimum=128, maximum=2048, step=16, label="Generated length")
+#    output_box = gr.Textbox(lines=10, label="Response")
+    output_box = gr.Markdown(label="Response", elem_id="markdown-output")
+    input_box = gr.Textbox(lines=8, label="Input you question")
+    examples = gr.Examples(
+        examples=[
+            ["Introducing the basic concepts of large language models"],
+            ["How to solve long context dependencies in math problems?"]
+        ],
+        inputs=input_box
+    )
+    interface = gr.Interface(
+        fn=chat,
+        inputs=[input_box, max_tokens_slider],
+        outputs=output_box,
+        live=False  # disable auto-triggering on input change
+    )
+# launch Gradio Service
+demo.queue()
+demo.launch()
+# Construct Gradio Interface
+#interface = gr.Interface(
+#    fn=chat,
+#    inputs=[
+#        gr.Textbox(lines=8, label="输入你的问题"),
+#        gr.Slider(minimum=100, maximum=102400, step=50, label="生成长度")
+#    ],
+#    outputs=[
+#        gr.Textbox(lines=8, label="模型回复")
+#    ],
+#    title="Ling-lite-1.5 AI助手",
+#    description="基于 [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5)  的对话式文本生成演示。",
+#    examples=[
+#        ["介绍大型语言模型的基本概念"],
+#        ["如何解决数学问题中的长上下文依赖？"]
+#    ]
+#)
+# launch Gradion Service
+#interface.launch()