# Library Imports

In [1]:
!pip install datasets
!pip install transformers
!pip install accelerate
!pip install gradio
!pip install dashscope
!pip install modelscope_studio
!pip freeze > requirements.txt

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

# Version 1


In [None]:
# Install transformers from source - only needed for versions <= v4.34
# pip install git+https://github.com/huggingface/transformers.git
# pip install accelerate

import torch
from transformers import pipeline

pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])
# <|system|>
# You are a friendly chatbot who always responds in the style of a pirate.</s>
# <|user|>
# How many helicopters can a human eat in one sitting?</s>
# <|assistant|>
# ...


Device set to use cpu


KeyboardInterrupt: 

# Version 2

In [None]:
import time
import torch
from transformers import pipeline
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
system_message = {"role": "system", "content": ("You are a Premium Chatbot who thinks before speaking and uses 2 paragraphs: one for Reasoning and another for Response. When replying, format your answer using the following tags exactly: output your reasoning with '<|assistant reasoning|>' followed by your chain-of-thought explanation, and then output your final reply with '<|assistant response|>' followed by your answer. Also, please use lowercase for '<|system|>' and '<|user|>' tags.")}
conversation = [system_message]
def reformat_output(output_text: str) -> str:
    reformatted = output_text.replace("<System>", "<|system|>").replace("<User>", "<|user|>")
    if "<assistant>" in reformatted:
        before, assistant_block = reformatted.split("<assistant>", 1)
        if "Reasoning:" in assistant_block and "Answer:" in assistant_block:
            reasoning_part = assistant_block.split("Answer:")[0]
            response_part = assistant_block.split("Answer:")[1]
            reasoning_part = reasoning_part.replace("Reasoning:", "").strip()
            response_part = response_part.strip()
            new_assistant_text = f"<|assistant reasoning|> {reasoning_part}\n<|assistant response|> {response_part}"
            reformatted = before + new_assistant_text
        else:
            reformatted = reformatted.replace("<assistant>", "<|assistant reasoning|>")
    return reformatted
print("Type 'bye' to exit the chat.")
while True:
    user_input = input("User: ")
    if user_input.strip().lower() == "bye":
        print("Chatbot: Farewell, Sir!")
        break
    conversation.append({"role": "user", "content": user_input})
    prompt = pipe.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    start_time = time.time()
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    end_time = time.time()
    response_time = end_time - start_time
    generated_text = outputs[0]["generated_text"]
    formatted_output = reformat_output(generated_text)
    print("Chatbot:")
    print(formatted_output)
    print(f"\nResponse Time: {response_time:.2f} seconds")
    print("\n" + "-"*50 + "\n")
    conversation.append({"role": "assistant", "content": generated_text})


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Device set to use cpu


Type 'bye' to exit the chat.
User: halle


KeyboardInterrupt: 

# Version 3

In [None]:
import time
import torch
import gradio as gr
from transformers import pipeline

pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
system_message = {"role": "system", "content": ("You are a Premium Chatbot who thinks before speaking and uses 2 paragraphs: one for Reasoning and another for Response. When replying, format your answer using the following tags exactly: output your reasoning with '<|assistant reasoning|>' followed by your chain-of-thought explanation, and then output your final reply with '<|assistant response|>' followed by your answer. Also, please use lowercase for '<|system|>' and '<|user|>' tags.")}
conversation = [system_message]

def reformat_output(output_text: str) -> str:
    reformatted = output_text.replace("<System>", "<|system|>").replace("<User>", "<|user|>")
    if "<assistant>" in reformatted:
        before, assistant_block = reformatted.split("<assistant>", 1)
        if "Reasoning:" in assistant_block and "Answer:" in assistant_block:
            reasoning_part = assistant_block.split("Answer:")[0]
            response_part = assistant_block.split("Answer:")[1]
            reasoning_part = reasoning_part.replace("Reasoning:", "").strip()
            response_part = response_part.strip()
            new_assistant_text = f"<|assistant reasoning|> {reasoning_part}\n<|assistant response|> {response_part}"
            reformatted = before + new_assistant_text
        else:
            reformatted = reformatted.replace("<assistant>", "<|assistant reasoning|>")
    return reformatted

def chat(user_input, state):
    if user_input.strip().lower() == "bye":
        return "Chatbot: Farewell, Sir!", state
    state.append({"role": "user", "content": user_input})
    prompt = pipe.tokenizer.apply_chat_template(state, tokenize=False, add_generation_prompt=True)
    start_time = time.time()
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    end_time = time.time()
    response_time = end_time - start_time
    generated_text = outputs[0]["generated_text"]
    formatted_output = reformat_output(generated_text)
    state.append({"role": "assistant", "content": generated_text})
    result = f"{formatted_output}\n\nResponse Time: {response_time:.2f} seconds"
    return result, state

with gr.Blocks() as demo:
    gr.Markdown("## LogicLink Chatbot")
    chatbot_state = gr.State(value=conversation)
    chatbot_output = gr.Textbox(label="Chatbot Response", interactive=False)
    user_input_text = gr.Textbox(placeholder="Type your message here...", label="Your Message")
    send_btn = gr.Button("Send")
    send_btn.click(chat, inputs=[user_input_text, chatbot_state], outputs=[chatbot_output, chatbot_state])

demo.launch()
def generate_formatted_output(user_prompt):
    system_text = (
        "You are a Premium Chatbot who thinks before speaking and always responds "
        "in the specified format."
    )

    # Reasoning is based on the provided reasoning prompt
    assistant_reasoning = "Different ways to approach the Query : " + user_prompt + " [Insert chain-of-thought reasoning here.]"

    # Response is based on the provided response prompt (echoing the query here)
    assistant_response = user_prompt  # Replace with your actual response logic if needed

    formatted_output = (
        f"<|system|>\n{system_text}\n"
        f"<|user|>\n{user_prompt}\n"
        f"<|assistant reasoning|>\n{assistant_reasoning}\n"
        f"<|assistant response|>\n{assistant_response}"
    )

    return formatted_output

# Example usage:
user_input = "hello how are you"
output = generate_formatted_output(user_input)
print(output)


Device set to use cpu


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1d08e9fcb3ee401634.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


<|system|>
You are a Premium Chatbot who thinks before speaking and always responds in the specified format.
<|user|>
hello how are you
<|assistant reasoning|>
Different ways to approach the Query : hello how are you [Insert chain-of-thought reasoning here.]
<|assistant response|>
hello how are you


In [None]:
import time
import torch
import gradio as gr
from transformers import pipeline

# Initialize the text-generation pipeline
pipe = pipeline(
    "text-generation",
    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" if torch.cuda.is_available() else None
)

def logiclink_chat(user_input, history):
    if not user_input:
        return history, ""
    # Append user message
    history.append(("You", user_input))

    # Build prompts for reasoning and response
    reasoning_prompt = (
        "Reflect on all potential nuances, underlying concepts, and alternative perspectives "
        "before forming a final answer for the following query: " + user_input
    )
    response_prompt = user_input

    start = time.time()
    # Generate chain-of-thought reasoning
    reasoning_out = pipe(
        reasoning_prompt,
        max_new_tokens=128,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95
    )
    reasoning = reasoning_out[0]["generated_text"].strip()

    # Generate final answer
    response_out = pipe(
        response_prompt,
        max_new_tokens=128,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95
    )
    response = response_out[0]["generated_text"].strip()
    elapsed = time.time() - start

    # Append reasoning and response to history
    history.append(("LogicLink – Reasoning", reasoning))
    history.append(("LogicLink - Response", f"{response}\n\n*({elapsed:.2f}s)*"))

    return history, ""

# Build Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=3):
            gr.Markdown("## LogicLink Chat")
            chatbot = gr.Chatbot(label="Conversation", height=600)
            user_input = gr.Textbox(show_label=False, placeholder="Type your query here…")
            send = gr.Button("Send", variant="primary")
        with gr.Column(scale=1):
            gr.Markdown(
                "**How LogicLink Works**\n\n"
                "- You type a question.\n"
                "- LogicLink first shows its _chain-of-thought_.\n"
                "- Then it gives the concise answer below.\n"
                "- Conversation history is on the left."
            )

    send.click(logiclink_chat, [user_input, chatbot], [chatbot, user_input])
    user_input.submit(logiclink_chat, [user_input, chatbot], [chatbot, user_input])

demo.launch()


# Version 4

In [None]:
import time
import torch
import gradio as gr
from transformers import pipeline

# Initialize the text-generation pipeline
pipe = pipeline(
    "text-generation",
    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" if torch.cuda.is_available() else None
)

def logiclink_chat(user_input, history):
    if not user_input:
        return history, ""
    # Append user message
    history.append(("You", user_input))

    # Build reasoning prompt
    reasoning_prompt = (
        "Reflect on all potential nuances, underlying concepts, and alternative perspectives "
        "before forming a final answer for the following query: " + user_input
    )

    start = time.time()
    # Generate chain-of-thought reasoning
    reasoning = pipe(
        reasoning_prompt,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95
    )[0]["generated_text"].strip()

    # Generate final answer
    response = pipe(
        user_input,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95
    )[0]["generated_text"].strip()

    elapsed = time.time() - start

    # Append reasoning and response
    history.append(("LogicLink – Reasoning", reasoning))
    history.append(("LogicLink – Response", f"{response}\n\n*({elapsed:.2f}s)*"))

    return history, ""

# Custom CSS for black background, red accents, blue borders
custom_css = """
body {
    background-color: #000;
    color: #fff;
}
.gradio-container {
    border-radius: 8px;
    padding: 1rem;
}
.gr-button, .gr-button:hover {
    background-color: #e53935 !important;  /* red buttons */
    color: #fff !important;
}
.gr-textbox, .gr-chatbot {
    border: 2px solid #1e88e5 !important;  /* blue borders */
}
.gr-textbox textarea, .gr-chatbot {
    background-color: #111;
    color: #fff;
}
.gr-row {
    margin-bottom: 1rem;
}
"""

with gr.Blocks(css=custom_css) as demo:
    with gr.Row():
        with gr.Column(scale=3):
            gr.Markdown("## 🎨 LogicLink Chatbot")
            chatbot = gr.Chatbot(label="Conversation", height=600)
            user_input = gr.Textbox(show_label=False, placeholder="Type your query here…")
            send_btn = gr.Button("Send")
        with gr.Column(scale=1):
            gr.Markdown(
                "**How LogicLink Works**\n\n"
                "- You type a question.\n"
                "- LogicLink shows its _chain-of-thought_ first.\n"
                "- Then it gives the concise answer below.\n"
                "- Chat history stays on the left."
            )

    send_btn.click(logiclink_chat, [user_input, chatbot], [chatbot, user_input])
    user_input.submit(logiclink_chat, [user_input, chatbot], [chatbot, user_input])

demo.launch()


Device set to use cpu
  chatbot = gr.Chatbot(label="Conversation", height=600)


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a8c1409802d141d0ae.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# Version 5

In [None]:
import uuid
import time
import json
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
from threading import Thread
import modelscope_studio.components.antd as antd
import modelscope_studio.components.antdx as antdx
import modelscope_studio.components.base as ms
import modelscope_studio.components.pro as pro
from config import DEFAULT_LOCALE, DEFAULT_THEME, get_text, user_config, bot_config, welcome_config
from ui_components.logo import Logo
from ui_components.settings_header import SettingsHeader

# Loading the tokenizer and model from Hugging Face's model hub
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# Using CUDA for an optimal experience
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Defining a custom stopping criteria class for the model's text generation
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        stop_ids = [2]  # IDs of tokens where the generation should stop
        for stop_id in stop_ids:
            if input_ids[0][-1] == stop_id:  # Checking if the last generated token is a stop token
                return True
        return False

# Function to generate model predictions with streaming
def generate_response(user_input, history):
    stop = StopOnTokens()
    messages = "</s>".join(["</s>".join(["\n<|user|>:" + item["content"] if item["role"] == "user" else "\n<|assistant|>:" + item["content"]])
                            for item in history])
    messages += f"\n<|user|>:{user_input}\n<|assistant|>:"
    model_inputs = tokenizer([messages], return_tensors="pt").to(device)
    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        model_inputs,
        streamer=streamer,
        max_new_tokens=1024,
        do_sample=True,
        top_p=0.95,
        top_k=50,
        temperature=0.7,
        num_beams=1,
        stopping_criteria=StoppingCriteriaList([stop])
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()  # Starting the generation in a separate thread
    partial_message = ""
    for new_token in streamer:
        partial_message += new_token
        if '</s>' in partial_message:  # Breaking the loop if the stop token is generated
            break
    return partial_message

class Gradio_Events:
    _generating = False

    @staticmethod
    def logiclink_chat(user_input, history):
        if not user_input:
            return history, "No input provided"

        try:
            start = time.time()
            response = generate_response(user_input, history)
            elapsed = time.time() - start

            # Format output
            response_with_time = f"{response}\n\n*({elapsed:.2f}s)*"

            # Append as one output
            history.append({
                "role": "assistant",
                "content": response_with_time,
                "key": str(uuid.uuid4()),
                "avatar": None
            })

            return history, response_with_time
        except Exception as e:
            error_msg = (
                f"Generation failed: {str(e)}. "
                f"Possible causes: insufficient memory, model incompatibility, or input issues."
            )
            history.append({
                "role": "assistant",
                "content": error_msg,
                "key": str(uuid.uuid4()),
                "avatar": None
            })
            return history, error_msg

    @staticmethod
    def add_message(input_value, state_value):
        # Initialize default outputs
        input_update = gr.update(value="")
        chatbot_update = gr.update(value=state_value["conversation_contexts"].get(state_value["conversation_id"], {"history": []})["history"])
        state_update = gr.update(value=state_value)

        if not input_value.strip():
            return input_update, chatbot_update, state_update

        if not state_value["conversation_id"]:
            random_id = str(uuid.uuid4())
            state_value["conversation_id"] = random_id
            state_value["conversation_contexts"][random_id] = {"history": []}
            state_value["conversations"].append({
                "label": input_value[:20] + ("..." if len(input_value) > 20 else ""),
                "key": random_id
            })

        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        history.append({
            "role": "user",
            "content": input_value,
            "key": str(uuid.uuid4()),
            "avatar": None
        })

        chatbot_update = gr.update(value=history)
        state_update = gr.update(value=state_value)

        return input_update, chatbot_update, state_update

    @staticmethod
    def submit(state_value):
        if Gradio_Events._generating:
            history = state_value["conversation_contexts"].get(state_value["conversation_id"], {"history": []})["history"]
            return (
                gr.update(value=history),
                gr.update(value=state_value),
                gr.update(value="Generation in progress, please wait...")
            )

        Gradio_Events._generating = True
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]

        user_input = history[-1]["content"] if history and history[-1]["role"] == "user" else ""
        if not user_input:
            Gradio_Events._generating = False
            return (
                gr.update(value=history),
                gr.update(value=state_value),
                gr.update(value="No user input provided")
            )

        history, response = Gradio_Events.logiclink_chat(user_input, history)
        state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = history

        Gradio_Events._generating = False
        return (
            gr.update(value=history),
            gr.update(value=state_value),
            gr.update(value=response)
        )

    @staticmethod
    def new_chat(state_value):
        state_value["conversation_id"] = ""
        return (
            gr.update(items=state_value["conversations"]),
            gr.update(value=[]),
            gr.update(value=state_value),
            gr.update(value="")
        )

    @staticmethod
    def clear_history(state_value):
        if state_value["conversation_id"]:
            state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = []
        return (
            gr.update(value=[]),
            gr.update(value=state_value),
            gr.update(value="")
        )

# Custom CSS with red/blue/black theme
css = """
:root {
    --color-red: #ff4444;
    --color-blue: #1e88e5;
    --color-black: #000000;
    --color-dark-gray: #121212;
}

.gradio-container {
    background: var(--color-black) !important;
    color: white !important;
}

/* Input styling */
.gr-textbox textarea, .ms-gr-ant-input-textarea {
    background: var(--color-dark-gray) !important;
    border: 2px solid var(--color-blue) !important;
    color: white !important;
}

/* Output (chatbot) styling */
.gr-chatbot {
    background: var(--color-dark-gray) !important;
    border: 2px solid var(--color-red) !important;
}

/* Output textbox styling */
.gr-textbox.output-textbox {
    background: var(--color-dark-gray) !important;
    border: 2px solid var(--color-red) !important;
    color: white !important;
    margin-bottom: 10px;
}

/* User message bubbles */
.gr-chatbot .user {
    background: var(--color-blue) !important;
    border-color: var(--color-blue) !important;
}

/* Assistant message bubbles */
.gr-chatbot .bot {
    background: var(--color-dark-gray) !important;
    border: 1px solid var(--color-red) !important;
}

/* Buttons */
.gr-button {
    background: var(--color-blue) !important;
    border-color: var(--color-blue) !important;
}

/* Thinking tooltip */
.gr-chatbot .tool {
    background: var(--color-dark-gray) !important;
    border: 1px solid var(--color-red) !important;
}
"""

with gr.Blocks(css=css, fill_width=True) as demo:
    state = gr.State({
        "conversation_contexts": {},
        "conversations": [],
        "conversation_id": "",
    })

    with ms.Application(), antdx.XProvider(
            theme=DEFAULT_THEME, locale=DEFAULT_LOCALE), ms.AutoLoading():
        with antd.Row(gutter=[20, 20], wrap=False, elem_id="chatbot"):
            # Left Column
            with antd.Col(md=dict(flex="0 0 260px", span=24, order=0),
                        span=0, order=1):
                with ms.Div(elem_classes="chatbot-conversations"):
                    with antd.Flex(vertical=True, gap="small",
                                  elem_style=dict(height="100%")):
                        Logo()

                        # New Chat Button
                        with antd.Button(
                            color="primary",
                            variant="filled",
                            block=True,
                            elem_classes="new-chat-btn"
                        ) as new_chat_btn:
                            ms.Text(get_text("New Chat", "新建对话"))
                            with ms.Slot("icon"):
                                antd.Icon("PlusOutlined")

                        # Conversations List
                        with antdx.Conversations(
                                elem_classes="chatbot-conversations-list"
                        ) as conversations:
                            with ms.Slot('menu.items'):
                                with antd.Menu.Item(
                                        label="Delete",
                                        key="delete",
                                        danger=True
                                ) as conversation_delete_menu_item:
                                    with ms.Slot("icon"):
                                        antd.Icon("DeleteOutlined")

            # Right Column
            with antd.Col(flex=1, elem_style=dict(height="100%")):
                with antd.Flex(vertical=True, gap="small",
                               elem_classes="chatbot-chat"):
                    # Chat Display
                    chatbot = pro.Chatbot(
                        elem_classes="chatbot-chat-messages",
                        height=600,
                        welcome_config=welcome_config(),
                        user_config=user_config(),
                        bot_config=bot_config()
                    )

                    # Output Textbox
                    output_textbox = gr.Textbox(
                        label="LatestOutputTextbox",
                        lines=1,
                        elem_classes="output-textbox",
                        interactive=True
                    )

                    # Input Area
                    with antdx.Suggestion(items=[]):
                        with ms.Slot("children"):
                            with antdx.Sender(
                                placeholder="Type your message...",
                                elem_classes="chat-input"
                            ) as input:
                                with ms.Slot("prefix"):
                                    with antd.Flex(gap=4):
                                        with antd.Button(
                                            type="text",
                                            elem_classes="clear-btn"
                                        ) as clear_btn:
                                            with ms.Slot("icon"):
                                                antd.Icon("ClearOutlined")

    # Event Handlers
    input.submit(
        fn=Gradio_Events.add_message,
        inputs=[input, state],
        outputs=[input, chatbot, state]
    ).then(
        fn=Gradio_Events.submit,
        inputs=[state],
        outputs=[chatbot, state, output_textbox]
    )

    new_chat_btn.click(
        fn=Gradio_Events.new_chat,
        inputs=[state],
        outputs=[conversations, chatbot, state, output_textbox]
    )

    clear_btn.click(
        fn=Gradio_Events.clear_history,
        inputs=[state],
        outputs=[chatbot, state, output_textbox]
    )

demo.queue().launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://67101a38476e88661c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://298833b3c6d34bf2fc.gradio.live
Killing tunnel 127.0.0.1:7861 <> https://67101a38476e88661c.gradio.live




In [None]:
import uuid
import time
import json
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
from threading import Thread
import modelscope_studio.components.antd as antd
import modelscope_studio.components.antdx as antdx
import modelscope_studio.components.base as ms
import modelscope_studio.components.pro as pro
from config import DEFAULT_LOCALE, DEFAULT_THEME, get_text, user_config, bot_config, welcome_config
from ui_components.logo import Logo
from ui_components.settings_header import SettingsHeader
import re

# Loading the tokenizer and model from Hugging Face's model hub
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# Using CUDA for an optimal experience
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Defining a custom stopping criteria class for the model's text generation
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        stop_ids = [2]  # IDs of tokens where the generation should stop
        for stop_id in stop_ids:
            if input_ids[0][-1] == stop_id:  # Checking if the last generated token is a stop token
                return True
        return False

# Function to generate model predictions with streaming
def generate_response(user_input, history):
    stop = StopOnTokens()
    messages = "</s>".join(["</s>".join(["\n<|user|>:" + item["content"] if item["role"] == "user" else "\n<|assistant|>:" + item["content"]])
                            for item in history])
    messages += f"\n<|user|>:{user_input}\n<|assistant|>:"
    model_inputs = tokenizer([messages], return_tensors="pt").to(device)
    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        model_inputs,
        streamer=streamer,
        max_new_tokens=1024,
        do_sample=True,
        top_p=0.95,
        top_k=50,
        temperature=0.7,
        num_beams=1,
        stopping_criteria=StoppingCriteriaList([stop])
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()  # Starting the generation in a separate thread
    partial_message = ""
    for new_token in streamer:
        partial_message += new_token
        if '</s>' in partial_message:  # Breaking the loop if the stop token is generated
            break
    return partial_message

class Gradio_Events:
    _generating = False

    @staticmethod
    def logiclink_chat(user_input, history):
        if not user_input:
            return history, "No input provided"

        try:
            start = time.time()
            response = generate_response(user_input, history)
            elapsed = time.time() - start

            # Clean any existing time stamps from the response
            cleaned_response = re.sub(r'\*\(\d+\.\d+s\)\*', '', response).strip()

            # Format output with single time stamp
            response_with_time = f"{cleaned_response}\n\n*({elapsed:.2f}s)*"

            # Append as one output
            history.append({
                "role": "assistant",
                "content": response_with_time,
                "key": str(uuid.uuid4()),
                "avatar": None
            })

            return history, response_with_time
        except Exception as e:
            error_msg = (
                f"Generation failed: {str(e)}. "
                f"Possible causes: insufficient memory, model incompatibility, or input issues."
            )
            history.append({
                "role": "assistant",
                "content": error_msg,
                "key": str(uuid.uuid4()),
                "avatar": None
            })
            return history, error_msg

    @staticmethod
    def add_message(input_value, state_value):
        # Initialize default outputs
        input_update = gr.update(value="")
        chatbot_update = gr.update(value=state_value["conversation_contexts"].get(state_value["conversation_id"], {"history": []})["history"])
        state_update = gr.update(value=state_value)

        if not input_value.strip():
            return input_update, chatbot_update, state_update

        if not state_value["conversation_id"]:
            random_id = str(uuid.uuid4())
            state_value["conversation_id"] = random_id
            state_value["conversation_contexts"][random_id] = {"history": []}
            state_value["conversations"].append({
                "label": input_value[:20] + ("..." if len(input_value) > 20 else ""),
                "key": random_id
            })

        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        history.append({
            "role": "user",
            "content": input_value,
            "key": str(uuid.uuid4()),
            "avatar": None
        })

        chatbot_update = gr.update(value=history)
        state_update = gr.update(value=state_value)

        return input_update, chatbot_update, state_update

    @staticmethod
    def submit(state_value):
        if Gradio_Events._generating:
            history = state_value["conversation_contexts"].get(state_value["conversation_id"], {"history": []})["history"]
            return (
                gr.update(value=history),
                gr.update(value=state_value),
                gr.update(value="Generation in progress, please wait...")
            )

        Gradio_Events._generating = True
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]

        user_input = history[-1]["content"] if history and history[-1]["role"] == "user" else ""
        if not user_input:
            Gradio_Events._generating = False
            return (
                gr.update(value=history),
                gr.update(value=state_value),
                gr.update(value="No user input provided")
            )

        history, response = Gradio_Events.logiclink_chat(user_input, history)
        state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = history

        Gradio_Events._generating = False
        return (
            gr.update(value=history),
            gr.update(value=state_value),
            gr.update(value=response)
        )

    @staticmethod
    def new_chat(state_value):
        state_value["conversation_id"] = ""
        return (
            gr.update(items=state_value["conversations"]),
            gr.update(value=[]),
            gr.update(value=state_value),
            gr.update(value="")
        )

    @staticmethod
    def clear_history(state_value):
        if state_value["conversation_id"]:
            state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = []
        return (
            gr.update(value=[]),
            gr.update(value=state_value),
            gr.update(value="")
        )

# Custom CSS with red/blue/black theme
css = """
:root {
    --color-red: #ff4444;
    --color-blue: #1e88e5;
    --color-black: #000000;
    --color-dark-gray: #121212;
}

.gradio-container {
    background: var(--color-black) !important;
    color: white !important;
}

/* Input styling */
.gr-textbox textarea, .ms-gr-ant-input-textarea {
    background: var(--color-dark-gray) !important;
    border: 2px solid var(--color-blue) !important;
    color: white !important;
}

/* Output (chatbot) styling */
.gr-chatbot {
    background: var(--color-dark-gray) !important;
    border: 2px solid var(--color-red) !important;
}

/* Output textbox styling */
.gr-textbox.output-textbox {
    background: var(--color-dark-gray) !important;
    border: 2px solid var(--color-red) !important;
    color: white !important;
    margin-bottom: 10px;
}

/* User message bubbles */
.gr-chatbot .user {
    background: var(--color-blue) !important;
    border-color: var(--color-blue) !important;
}

/* Assistant message bubbles */
.gr-chatbot .bot {
    background: var(--color-dark-gray) !important;
    border: 1px solid var(--color-red) !important;
}

/* Buttons */
.gr-button {
    background: var(--color-blue) !important;
    border-color: var(--color-blue) !important;
}

/* Thinking tooltip */
.gr-chatbot .tool {
    background: var(--color-dark-gray) !important;
    border: 1px solid var(--color-red) !important;
}
"""

with gr.Blocks(css=css, fill_width=True, title="LogicLinkV5") as demo:
    state = gr.State({
        "conversation_contexts": {},
        "conversations": [],
        "conversation_id": "",
    })

    with ms.Application(), antdx.XProvider(
            theme=DEFAULT_THEME, locale=DEFAULT_LOCALE), ms.AutoLoading():
        with antd.Row(gutter=[20, 20], wrap=False, elem_id="chatbot"):
            # Left Column
            with antd.Col(md=dict(flex="0 0 260px", span=24, order=0),
                        span=0, order=1):
                with ms.Div(elem_classes="chatbot-conversations"):
                    with antd.Flex(vertical=True, gap="small",
                                  elem_style=dict(height="100%")):
                        Logo()

                        # New Chat Button
                        with antd.Button(
                            color="primary",
                            variant="filled",
                            block=True,
                            elem_classes="new-chat-btn"
                        ) as new_chat_btn:
                            ms.Text(get_text("New Chat", "新建对话"))
                            with ms.Slot("icon"):
                                antd.Icon("PlusOutlined")

                        # Conversations List
                        with antdx.Conversations(
                                elem_classes="chatbot-conversations-list"
                        ) as conversations:
                            with ms.Slot('menu.items'):
                                with antd.Menu.Item(
                                        label="Delete",
                                        key="delete",
                                        danger=True
                                ) as conversation_delete_menu_item:
                                    with ms.Slot("icon"):
                                        antd.Icon("DeleteOutlined")

            # Right Column
            with antd.Col(flex=1, elem_style=dict(height="100%")):
                with antd.Flex(vertical=True, gap="small",
                               elem_classes="chatbot-chat"):
                    # Chat Display
                    chatbot = pro.Chatbot(
                        elem_classes="chatbot-chat-messages",
                        height=600,
                        welcome_config=welcome_config(),
                        user_config=user_config(),
                        bot_config=bot_config()
                    )

                    # Output Textbox
                    output_textbox = gr.Textbox(
                        label="LatestOutputTextbox",
                        lines=1,
                        elem_classes="output-textbox",
                        interactive=True
                    )

                    # Input Area
                    with antdx.Suggestion(items=[]):
                        with ms.Slot("children"):
                            with antdx.Sender(
                                placeholder="Type your message...",
                                elem_classes="chat-input"
                            ) as input:
                                with ms.Slot("prefix"):
                                    with antd.Flex(gap=4):
                                        with antd.Button(
                                            type="text",
                                            elem_classes="clear-btn"
                                        ) as clear_btn:
                                            with ms.Slot("icon"):
                                                antd.Icon("ClearOutlined")

    # Event Handlers
    input.submit(
        fn=Gradio_Events.add_message,
        inputs=[input, state],
        outputs=[input, chatbot, state]
    ).then(
        fn=Gradio_Events.submit,
        inputs=[state],
        outputs=[chatbot, state, output_textbox]
    )

    new_chat_btn.click(
        fn=Gradio_Events.new_chat,
        inputs=[state],
        outputs=[conversations, chatbot, state, output_textbox]
    )

    clear_btn.click(
        fn=Gradio_Events.clear_history,
        inputs=[state],
        outputs=[chatbot, state, output_textbox]
    )

demo.queue().launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://860436bdac3f222998.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7861 <> https://860436bdac3f222998.gradio.live




# Test Block

In [2]:
import uuid
import time
import re
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
from threading import Thread
import modelscope_studio.components.antd as antd
import modelscope_studio.components.antdx as antdx
import modelscope_studio.components.base as ms
import modelscope_studio.components.pro as pro
from config import DEFAULT_LOCALE, DEFAULT_THEME, get_text, user_config, bot_config, welcome_config
from ui_components.logo import Logo
from ui_components.settings_header import SettingsHeader

# Loading the tokenizer and model from Hugging Face's model hub

tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# Using CUDA for an optimal experience

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Defining a custom stopping criteria class for the model's text generation

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        stop_ids = [2]  # IDs of tokens where the generation should stop.
        for stop_id in stop_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

# Function to generate model predictions with streaming

def generate_response(user_input, history):
    stop = StopOnTokens()
    messages = "</s>".join([
        "</s>".join([
            "\n<|user|>:" + item["content"] if item["role"] == "user"
            else "\n<|assistant|>:" + item["content"]
            for item in history
        ])
    ])
    messages += f"\n<|user|>:{user_input}\n<|assistant|>:"
    model_inputs = tokenizer([messages], return_tensors="pt").to(device)
    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        **model_inputs,
        streamer=streamer,
        max_new_tokens=1024,
        do_sample=True,
        top_p=0.95,
        top_k=50,
        temperature=0.7,
        num_beams=1,
        stopping_criteria=StoppingCriteriaList([stop])
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()  # Start generation in a separate thread.
    partial_message = ""
    for new_token in streamer:
        partial_message += new_token
        if '</s>' in partial_message:
            break
    return partial_message

# Define the system prompt for seeding the model's context

SYSTEM_PROMPT = (
    "I am LogicLink, Version 5—a state-of-the-art AI chatbot created by "
    "Kratu Gautam (A-27),"
    "I am here to assist you with any queries. How can I help you today?"
)

class Gradio_Events:
    _generating = False

    @staticmethod
    def new_chat(state_value):
        # This is CRITICAL - we DO NOT clean up old conversation
        # Instead, we leave it in the state to be accessed later

        # Create a fresh conversation
        new_id = str(uuid.uuid4())
        state_value["conversation_id"] = new_id

        # Add the new conversation to the list with a default name
        state_value["conversations"].append({
            "label": "New Chat",
            "key": new_id
        })

        # Seed it with system prompt
        state_value["conversation_contexts"][new_id] = {
            "history": [{
                "role": "system",
                "content": SYSTEM_PROMPT,
                "key": str(uuid.uuid4()),
                "avatar": None
            }]
        }

        # Return updates
        return (
            gr.update(items=state_value["conversations"]),
            gr.update(value=state_value["conversation_contexts"][new_id]["history"]),
            gr.update(value=state_value),
            gr.update(value="")  # empties input
        )

    @staticmethod
    def add_message(input_value, state_value):
        input_update = gr.update(value="")

        # If input is empty, just return
        if not input_value.strip():
            conversation = state_value["conversation_contexts"].get(state_value["conversation_id"], {"history": []})
            chatbot_update = gr.update(value=conversation["history"])
            state_update = gr.update(value=state_value)
            return input_update, chatbot_update, state_update

        # If there's no active conversation, initialize a new one
        if not state_value["conversation_id"]:
            random_id = str(uuid.uuid4())
            state_value["conversation_id"] = random_id
            state_value["conversation_contexts"][random_id] = {"history": [{
                "role": "system",
                "content": SYSTEM_PROMPT,
                "key": str(uuid.uuid4()),
                "avatar": None
            }]}

            # Set the chat name to the first message from user
            chat_name = input_value[:20] + ("..." if len(input_value) > 20 else "")
            state_value["conversations"].append({
                "label": chat_name,
                "key": random_id
            })
        else:
            # Get current conversation history
            current_id = state_value["conversation_id"]
            history = state_value["conversation_contexts"][current_id]["history"]

            # If this is the first user message (after system message), update the label
            user_messages = [msg for msg in history if msg["role"] == "user"]
            if len(user_messages) == 0:
                # This is the first user message - update the chat name
                chat_name = input_value[:20] + ("..." if len(input_value) > 20 else "")
                for i, conv in enumerate(state_value["conversations"]):
                    if conv["key"] == current_id:
                        state_value["conversations"][i]["label"] = chat_name
                        break

        # Add the message to history
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        history.append({
            "role": "user",
            "content": input_value,
            "key": str(uuid.uuid4()),
            "avatar": None
        })

        chatbot_update = gr.update(value=history)
        return input_update, chatbot_update, gr.update(value=state_value)

    @staticmethod
    def submit(state_value):
        if Gradio_Events._generating:
            history = state_value["conversation_contexts"].get(state_value["conversation_id"], {"history": []})["history"]
            return (
                gr.update(value=history),
                gr.update(value=state_value),
                gr.update(value="Generation in progress, please wait...")
            )

        Gradio_Events._generating = True

        # Make sure we have a valid conversation ID
        if not state_value["conversation_id"]:
            Gradio_Events._generating = False
            return (
                gr.update(value=[]),
                gr.update(value=state_value),
                gr.update(value="No active conversation")
            )

        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]

        # Assuming the last message is the latest user input
        user_input = history[-1]["content"] if (history and history[-1]["role"] == "user") else ""
        if not user_input:
            Gradio_Events._generating = False
            return (
                gr.update(value=history),
                gr.update(value=state_value),
                gr.update(value="No user input provided")
            )

        # Generate the response from the model
        history, response = Gradio_Events.logiclink_chat(user_input, history)
        state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = history
        Gradio_Events._generating = False
        return (
            gr.update(value=history),
            gr.update(value=state_value),
            gr.update(value=response)
        )

    @staticmethod
    def logiclink_chat(user_input, history):
        if not user_input:
            return history, "No input provided"
        try:
            start = time.time()
            response = generate_response(user_input, history)
            elapsed = time.time() - start
            # Clean and format the response before appending it
            cleaned_response = re.sub(r'\*\(\d+\.\d+s\)\*', '', response).strip()
            response_with_time = f"{cleaned_response}\n\n*({elapsed:.2f}s)*"
            history.append({
                "role": "assistant",
                "content": response_with_time,
                "key": str(uuid.uuid4()),
                "avatar": None
            })
            return history, response_with_time
        except Exception as e:
            error_msg = (
                f"Generation failed: {str(e)}. "
                "Possible causes: insufficient memory, model incompatibility, or input issues."
            )
            history.append({
                "role": "assistant",
                "content": error_msg,
                "key": str(uuid.uuid4()),
                "avatar": None
            })
            return history, error_msg

    @staticmethod
    def clear_history(state_value):
        if state_value["conversation_id"]:
            # Only clear messages after system prompt
            current_history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
            if len(current_history) > 0 and current_history[0]["role"] == "system":
                system_message = current_history[0]
                state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = [system_message]
            else:
                state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = []

            # Return the cleared history
            return (
                gr.update(value=state_value["conversation_contexts"][state_value["conversation_id"]]["history"]),
                gr.update(value=state_value),
                gr.update(value="")
            )
        return (
            gr.update(value=[]),
            gr.update(value=state_value),
            gr.update(value="")
        )

    @staticmethod
    def delete_conversation(state_value, conversation_key):
        # Keep a copy of the conversations before removal
        new_conversations = [conv for conv in state_value["conversations"] if conv["key"] != conversation_key]

        # Remove the conversation from the list
        state_value["conversations"] = new_conversations

        # Delete the conversation context
        if conversation_key in state_value["conversation_contexts"]:
            del state_value["conversation_contexts"][conversation_key]

        # If we're deleting the active conversation
        if state_value["conversation_id"] == conversation_key:
            state_value["conversation_id"] = ""
            return gr.update(items=new_conversations), gr.update(value=[]), gr.update(value=state_value)

        # If deleting another conversation, keep the current one displayed
        return (
            gr.update(items=new_conversations),
            gr.update(value=state_value["conversation_contexts"].get(
                state_value["conversation_id"], {"history": []}
            )["history"]),
            gr.update(value=state_value)
        )

# (The remainder of your Gradio UI code remains largely unchanged.)

css = """
:root {
--color-red: #ff4444;
--color-blue: #1e88e5;
--color-black: #000000;
--color-dark-gray: #121212;
}
.gradio-container { background: var(--color-black) !important; color: white !important; }
.gr-textbox textarea, .ms-gr-ant-input-textarea { background: var(--color-dark-gray) !important; border: 2px solid var(--color-blue) !important; color: white !important; }
.gr-chatbot { background: var(--color-dark-gray) !important; border: 2px solid var(--color-red) !important; }
.gr-textbox.output-textbox { background: var(--color-dark-gray) !important; border: 2px solid var(--color-red) !important; color: white !important; margin-bottom: 10px; }
.gr-chatbot .user { background: var(--color-blue) !important; border-color: var(--color-blue) !important; }
.gr-chatbot .bot { background: var(--color-dark-gray) !important; border: 1px solid var(--color-red) !important; }
.gr-button { background: var(--color-blue) !important; border-color: var(--color-blue) !important; }
.gr-chatbot .tool { background: var(--color-dark-gray) !important; border: 1px solid var(--color-red) !important; }
"""

with gr.Blocks(css=css, fill_width=True, title="LogicLinkV5") as demo:
    state = gr.State({
        "conversation_contexts": {},
        "conversations": [],
        "conversation_id": "",
    })
    with ms.Application(), antdx.XProvider(theme=DEFAULT_THEME, locale=DEFAULT_LOCALE), ms.AutoLoading():
        with antd.Row(gutter=[20, 20], wrap=False, elem_id="chatbot"):
            # Left Column
            with antd.Col(md=dict(flex="0 0 260px", span=24, order=0), span=0, order=1):
                with ms.Div(elem_classes="chatbot-conversations"):
                    with antd.Flex(vertical=True, gap="small", elem_style=dict(height="100%")):
                        Logo()
                        with antd.Button(color="primary", variant="filled", block=True, elem_classes="new-chat-btn") as new_chat_btn:
                            ms.Text(get_text("New Chat", ","))
                            with ms.Slot("icon"):
                                antd.Icon("PlusOutlined")
                        with antdx.Conversations(elem_classes="chatbot-conversations-list") as conversations:
                            with ms.Slot('menu.items'):
                                with antd.Menu.Item(label="Delete", key="delete", danger=True) as conversation_delete_menu_item:
                                    with ms.Slot("icon"):
                                        antd.Icon("DeleteOutlined")
            # Right Column
            with antd.Col(flex=1, elem_style=dict(height="100%")):
                with antd.Flex(vertical=True, gap="small", elem_classes="chatbot-chat"):
                    chatbot = pro.Chatbot(elem_classes="chatbot-chat-messages", height=600,
                                         welcome_config=welcome_config(), user_config=user_config(),
                                         bot_config=bot_config())
                    output_textbox = gr.Textbox(label="LatestOutputTextbox", lines=1,
                                              elem_classes="output-textbox", interactive=True)
                    with antdx.Suggestion(items=[]):
                        with ms.Slot("children"):
                            with antdx.Sender(placeholder="Type your message...", elem_classes="chat-input") as input:
                                with ms.Slot("prefix"):
                                    with antd.Flex(gap=4):
                                        with antd.Button(type="text", elem_classes="clear-btn") as clear_btn:
                                            with ms.Slot("icon"):
                                                antd.Icon("ClearOutlined")
    # Event Handlers
    input.submit(fn=Gradio_Events.add_message, inputs=[input, state],
                outputs=[input, chatbot, state]).then(
        fn=Gradio_Events.submit, inputs=[state],
        outputs=[chatbot, state, output_textbox]
    )
    new_chat_btn.click(fn=Gradio_Events.new_chat,
                     inputs=[state],
                     outputs=[conversations, chatbot, state, input],
                     queue=False)
    clear_btn.click(fn=Gradio_Events.clear_history, inputs=[state],
                   outputs=[chatbot, state, output_textbox])
    conversations.menu_click(
        fn=lambda state_value, e: (
            # If there's no payload, skip
            gr.skip() if (e is None or not isinstance(e, dict) or 'key' not in e._data['payload'][0] or 'menu_key' not in e._data['payload'][1])
            else (
                # Extract keys
                (lambda conv_key, action_key: (
                    # If "delete", remove that convo
                    Gradio_Events.delete_conversation(state_value, conv_key)
                    if action_key == "delete"
                    # If other action, do nothing
                    else (
                        gr.update(items=state_value["conversations"]),
                        gr.update(value=state_value["conversation_contexts"]
                        .get(state_value["conversation_id"], {"history": []})
                        ["history"]),
                        gr.update(value=state_value)
                    )
                ))(
                    e._data['payload'][0]['key'],
                    e._data['payload'][1]['key']
                )
            )
        ),
        inputs=[state],
        outputs=[conversations, chatbot, state],
        queue=False
    )

demo.queue().launch(share=True, debug=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]



Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://4177a6ef073f87f7a7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://4177a6ef073f87f7a7.gradio.live




In [14]:
!zip -r model_files.zip \
  /root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0 \
  -x '*.no_exist/*'

from google.colab import files
files.download('model_files.zip')

  adding: root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/ (stored 0%)
  adding: root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/ (stored 0%)
  adding: root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6/ (stored 0%)
  adding: root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6/tokenizer.json (deflated 74%)
  adding: root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6/special_tokens_map.json (deflated 79%)
  adding: root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6/tokenizer_config.json (deflated 68%)
  adding: root/.cache/huggingface/hub/models--TinyLlama--TinyLlama-1.1B-Chat-v1.0/snapshots/fe8a4ea1ffedaf415f4da2f062534de366a451e6/config.json (def

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>