雷娃 commited on
Commit
f00ccef
·
1 Parent(s): 2ba773c

add API access to Ling service

Browse files
Files changed (3) hide show
  1. app.py +14 -28
  2. app_api.py +92 -0
  3. app_hf_model.py +106 -0
app.py CHANGED
@@ -4,45 +4,31 @@ from threading import Thread
4
  import gradio as gr
5
  import re
6
  import torch
 
7
 
8
- # load model and tokenizer
9
- model_name = "inclusionAI/Ling-lite-1.5"
10
- tokenizer = AutoTokenizer.from_pretrained(model_name)
11
- model = AutoModelForCausalLM.from_pretrained(
12
- model_name,
13
- torch_dtype="auto",
14
- device_map="auto",
15
- trust_remote_code=True
16
- ).eval()
17
 
18
  # define chat function
19
  def chat(user_input, max_new_tokens=2048):
20
  # chat history
21
- messages = [
22
  {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
23
  {"role": "user", "content": user_input}
24
  ]
25
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
26
-
27
- # encode the input prompt
28
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
29
 
30
- #create streamer
31
- streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
32
-
33
- def generate():
34
- model.generate(**inputs, max_new_tokens=max_new_tokens, streamer=streamer)
35
-
36
- thread = Thread(target=generate)
37
- thread.start()
38
 
39
- start_idx = len("SYSTEM") + len(messages[0]["content"]) + len("HUMAN") + len(user_input) + len("ASSISTANT")
40
- generated_text = ""
41
- for new_text in streamer:
42
- generated_text += new_text
43
- yield generated_text[start_idx:]
44
 
45
- thread.join()
46
 
47
  # Create a custom layout using Blocks
48
  with gr.Blocks(css="""
 
4
  import gradio as gr
5
  import re
6
  import torch
7
+ from openai import OpenAI
8
 
9
+ client = OpenAI(
10
+ api_key="sk-420ab66020704eabbe37501ec39b7a2b",
11
+ base_url="https://bailingchat.alipay.com",
12
+ )
 
 
 
 
 
13
 
14
  # define chat function
15
  def chat(user_input, max_new_tokens=2048):
16
  # chat history
17
+ messages_template = [
18
  {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
19
  {"role": "user", "content": user_input}
20
  ]
 
 
 
 
21
 
22
+ response = client.chat.completions.create(
23
+ model="Ling-lite-1.5-250604",
24
+ messages=messages_template,
25
+ max_tokens=max_new_tokens,
26
+ temperature=0.7,
27
+ top_p=1,
28
+ )
29
+ yield response.choices[0].message.content
30
 
 
 
 
 
 
31
 
 
32
 
33
  # Create a custom layout using Blocks
34
  with gr.Blocks(css="""
app_api.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
3
+ from threading import Thread
4
+ import gradio as gr
5
+ import re
6
+ import torch
7
+ from openai import OpenAI
8
+
9
+ client = OpenAI(
10
+ api_key="sk-420ab66020704eabbe37501ec39b7a2b",
11
+ base_url="https://bailingchat.alipay.com",
12
+ )
13
+
14
+ # define chat function
15
+ def chat(user_input, max_new_tokens=2048):
16
+ # chat history
17
+ messages_template = [
18
+ {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
19
+ {"role": "user", "content": user_input}
20
+ ]
21
+
22
+ response = client.chat.completions.create(
23
+ model="Ling-lite-1.5-250604",
24
+ messages=messages_template,
25
+ max_tokens=max_new_tokens,
26
+ temperature=0.7,
27
+ top_p=1,
28
+ )
29
+ yield response.choices[0].message.content
30
+
31
+
32
+
33
+ # Create a custom layout using Blocks
34
+ with gr.Blocks(css="""
35
+ #markdown-output {
36
+ height: 300px;
37
+ overflow-y: auto;
38
+ border: 1px solid #ddd;
39
+ padding: 10px;
40
+ }
41
+ """) as demo:
42
+ gr.Markdown(
43
+ "## Ling-lite-1.5 AI Assistant\n"
44
+ "Based on [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5) "
45
+ )
46
+
47
+ with gr.Row():
48
+ max_tokens_slider = gr.Slider(minimum=128, maximum=2048, step=16, label="Generated length")
49
+
50
+ # output_box = gr.Textbox(lines=10, label="Response")
51
+ output_box = gr.Markdown(label="Response", elem_id="markdown-output")
52
+ input_box = gr.Textbox(lines=8, label="Input you question")
53
+
54
+ examples = gr.Examples(
55
+ examples=[
56
+ ["Introducing the basic concepts of large language models"],
57
+ ["How to solve long context dependencies in math problems?"]
58
+ ],
59
+ inputs=input_box
60
+ )
61
+
62
+ interface = gr.Interface(
63
+ fn=chat,
64
+ inputs=[input_box, max_tokens_slider],
65
+ outputs=output_box,
66
+ live=False # disable auto-triggering on input change
67
+ )
68
+
69
+ # launch Gradio Service
70
+ demo.queue()
71
+ demo.launch()
72
+
73
+ # Construct Gradio Interface
74
+ #interface = gr.Interface(
75
+ # fn=chat,
76
+ # inputs=[
77
+ # gr.Textbox(lines=8, label="输入你的问题"),
78
+ # gr.Slider(minimum=100, maximum=102400, step=50, label="生成长度")
79
+ # ],
80
+ # outputs=[
81
+ # gr.Textbox(lines=8, label="模型回复")
82
+ # ],
83
+ # title="Ling-lite-1.5 AI助手",
84
+ # description="基于 [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5) 的对话式文本生成演示。",
85
+ # examples=[
86
+ # ["介绍大型语言模型的基本概念"],
87
+ # ["如何解决数学问题中的长上下文依赖?"]
88
+ # ]
89
+ #)
90
+
91
+ # launch Gradion Service
92
+ #interface.launch()
app_hf_model.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
3
+ from threading import Thread
4
+ import gradio as gr
5
+ import re
6
+ import torch
7
+
8
+ # load model and tokenizer
9
+ model_name = "inclusionAI/Ling-lite-1.5"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
+ torch_dtype="auto",
14
+ device_map="auto",
15
+ trust_remote_code=True
16
+ ).eval()
17
+
18
+ # define chat function
19
+ def chat(user_input, max_new_tokens=2048):
20
+ # chat history
21
+ messages = [
22
+ {"role": "system", "content": "You are Ling, an assistant created by inclusionAI"},
23
+ {"role": "user", "content": user_input}
24
+ ]
25
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
26
+
27
+ # encode the input prompt
28
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
29
+
30
+ #create streamer
31
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
32
+
33
+ def generate():
34
+ model.generate(**inputs, max_new_tokens=max_new_tokens, streamer=streamer)
35
+
36
+ thread = Thread(target=generate)
37
+ thread.start()
38
+
39
+ start_idx = len("SYSTEM") + len(messages[0]["content"]) + len("HUMAN") + len(user_input) + len("ASSISTANT")
40
+ generated_text = ""
41
+ for new_text in streamer:
42
+ generated_text += new_text
43
+ yield generated_text[start_idx:]
44
+
45
+ thread.join()
46
+
47
+ # Create a custom layout using Blocks
48
+ with gr.Blocks(css="""
49
+ #markdown-output {
50
+ height: 300px;
51
+ overflow-y: auto;
52
+ border: 1px solid #ddd;
53
+ padding: 10px;
54
+ }
55
+ """) as demo:
56
+ gr.Markdown(
57
+ "## Ling-lite-1.5 AI Assistant\n"
58
+ "Based on [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5) "
59
+ )
60
+
61
+ with gr.Row():
62
+ max_tokens_slider = gr.Slider(minimum=128, maximum=2048, step=16, label="Generated length")
63
+
64
+ # output_box = gr.Textbox(lines=10, label="Response")
65
+ output_box = gr.Markdown(label="Response", elem_id="markdown-output")
66
+ input_box = gr.Textbox(lines=8, label="Input you question")
67
+
68
+ examples = gr.Examples(
69
+ examples=[
70
+ ["Introducing the basic concepts of large language models"],
71
+ ["How to solve long context dependencies in math problems?"]
72
+ ],
73
+ inputs=input_box
74
+ )
75
+
76
+ interface = gr.Interface(
77
+ fn=chat,
78
+ inputs=[input_box, max_tokens_slider],
79
+ outputs=output_box,
80
+ live=False # disable auto-triggering on input change
81
+ )
82
+
83
+ # launch Gradio Service
84
+ demo.queue()
85
+ demo.launch()
86
+
87
+ # Construct Gradio Interface
88
+ #interface = gr.Interface(
89
+ # fn=chat,
90
+ # inputs=[
91
+ # gr.Textbox(lines=8, label="输入你的问题"),
92
+ # gr.Slider(minimum=100, maximum=102400, step=50, label="生成长度")
93
+ # ],
94
+ # outputs=[
95
+ # gr.Textbox(lines=8, label="模型回复")
96
+ # ],
97
+ # title="Ling-lite-1.5 AI助手",
98
+ # description="基于 [inclusionAI/Ling-lite-1.5](https://huggingface.co/inclusionAI/Ling-lite-1.5) 的对话式文本生成演示。",
99
+ # examples=[
100
+ # ["介绍大型语言模型的基本概念"],
101
+ # ["如何解决数学问题中的长上下文依赖?"]
102
+ # ]
103
+ #)
104
+
105
+ # launch Gradion Service
106
+ #interface.launch()