Gemma-7B-Instruct-api

Sleeping

App Files Files Community

BloodRain666 commited on Jun 22, 2024

Commit

ac24d92

verified ·

1 Parent(s): be7fffe

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -104

app.py CHANGED Viewed

@@ -1,106 +1,33 @@
-import os
-os.system('pip install dashscope')
 import gradio as gr
-from http import HTTPStatus
-import dashscope
-from dashscope import Generation
-from dashscope.api_entities.dashscope_response import Role
-from typing import List, Optional, Tuple, Dict
-from urllib.error import HTTPError
-default_system = 'You are a helpful assistant.'
-YOUR_API_TOKEN = os.getenv('YOUR_API_TOKEN')
-print(YOUR_API_TOKEN)
-dashscope.api_key = YOUR_API_TOKEN
-History = List[Tuple[str, str]]
-Messages = List[Dict[str, str]]
-def clear_session() -> History:
-    return '', []
-def modify_system_session(system: str) -> str:
-    if system is None or len(system) == 0:
-        system = default_system
-    return system, system, []
-def history_to_messages(history: History, system: str) -> Messages:
-    messages = [{'role': Role.SYSTEM, 'content': system}]
-    for h in history:
-        messages.append({'role': Role.USER, 'content': h[0]})
-        messages.append({'role': Role.ASSISTANT, 'content': h[1]})
-    return messages
-def messages_to_history(messages: Messages) -> Tuple[str, History]:
-    assert messages[0]['role'] == Role.SYSTEM
-    system = messages[0]['content']
-    history = []
-    for q, r in zip(messages[1::2], messages[2::2]):
-        history.append([q['content'], r['content']])
-    return system, history
-def model_chat(query: Optional[str], history: Optional[History], system: str
-) -> Tuple[str, str, History]:
-    if query is None:
-        query = ''
-    if history is None:
-        history = []
-    messages = history_to_messages(history, system)
-    messages.append({'role': Role.USER, 'content': query})
-    gen = Generation.call(
-        model='google/gemma-1.1-7b-it',
-        messages=messages,
-        result_format='message',
-        stream=True
-    )
-    for response in gen:
-        if response.status_code == HTTPStatus.OK:
-            role = response.output.choices[0].message.role
-            response = response.output.choices[0].message.content
-            system, history = messages_to_history(messages + [{'role': role, 'content': response}])
-            yield '', history, system
-        else:
-            raise ValueError('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
-                response.request_id, response.status_code,
-                response.code, response.message
-            ))
-with gr.Blocks() as demo:
-    gr.Markdown("""<center><font size=8>Qwen2-72B-instruct Chat👾</center>""")
-    with gr.Row():
-        with gr.Column(scale=3):
-            system_input = gr.Textbox(value=default_system, lines=1, label='System')
-        with gr.Column(scale=1):
-            modify_system = gr.Button("🛠️ Set system prompt and clear history", scale=2)
-        system_state = gr.Textbox(value=default_system, visible=False)
-    chatbot = gr.Chatbot(label='qwen2-72B-instruct')
-    textbox = gr.Textbox(lines=1, label='Input')
-    with gr.Row():
-        clear_history = gr.Button("🧹 Clear history")
-        sumbit = gr.Button("🚀 Send")
-    textbox.submit(model_chat,
-                 inputs=[textbox, chatbot, system_state],
-                 outputs=[textbox, chatbot, system_input],
-                 concurrency_limit = 40)
-    sumbit.click(model_chat,
-                 inputs=[textbox, chatbot, system_state],
-                 outputs=[textbox, chatbot, system_input],
-                 concurrency_limit = 40)
-    clear_history.click(fn=clear_session,
-                        inputs=[],
-                        outputs=[textbox, chatbot],
-                        concurrency_limit = 40)
-    modify_system.click(fn=modify_system_session,
-                        inputs=[system_input],
-                        outputs=[system_state, system_input, chatbot],
-                        concurrency_limit = 40)
-demo.queue(api_open=False)
-demo.launch(max_threads=40)

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# Загрузка модели и токенизатора
+model_name = "google/gemma-1.1-7b-it"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# Функция для обработки сообщений
+def chat_with_model(input_text):
+    # Токенизация входного текста
+    inputs = tokenizer(input_text, return_tensors="pt")
+    # Генерация ответа
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_length=512, pad_token_id=tokenizer.eos_token_id)
+    # Декодирование и возвращение ответа
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+# Создание интерфейса Gradio
+interface = gr.Interface(
+    fn=chat_with_model,
+    inputs="text",
+    outputs="text",
+    title="Gemma Chatbot",
+    description="Чат-бот на основе модели google/gemma-1.1-7b-it"
+)
+# Запуск интерфейса
+interface.launch()