import gradio as gr from gradio import ChatMessage import time sleep_time = 0.5 def simulate_thinking_chat(message, history): start_time = time.time() response = ChatMessage( content="", metadata={"title": "_Thinking_ step-by-step", "id": 0, "status": "pending"} ) print("one") yield response print("two") thoughts = [ "First, I need to understand the core aspects of the query...", "Now, considering the broader context and implications...", "Analyzing potential approaches to formulate a comprehensive answer...", "Finally, structuring the response for clarity and completeness..." ] accumulated_thoughts = "" for thought in thoughts: time.sleep(sleep_time) accumulated_thoughts += f"- {thought}\n\n" response.content = accumulated_thoughts.strip() print("three") yield response print("four") response.metadata["status"] = "done" response.metadata["duration"] = time.time() - start_time print("five") yield response print("six") time.sleep(5.0) response = [ response, ChatMessage( content="Based on my thoughts and analysis above, my response is: This dummy repro shows how thoughts of a thinking LLM can be progressively shown before providing its final answer." ) ] print("seven") yield response print("eight") #response = ChatMessage( # content="Based on my thoughts and analysis above, my response is: This dummy repro shows how thoughts of a thinking LLM can be progressively shown before providing its final answer." # ) #yield response demo1 = gr.ChatInterface( simulate_thinking_chat, title="Thinking LLM Chat Interface 🤔", type="messages", ) with gr.Blocks() as demo: chat = gr.Chatbot() with gr.Row(): text = gr.Textbox(scale=9) btn = gr.Button(scale=1) btn.click(simulate_thinking_chat, [text, chat], [chat]).then(lambda :gr.Textbox(value=""), [], text) text.submit(simulate_thinking_chat, [text, chat], [chat]).then(lambda :gr.Textbox(value=""), [], text) if __name__ == "__main__": demo.launch()