import gradio as gr
from utils.model_configuration_utils import select_best_model, ensure_model
from services.llm import build_llm
from utils.voice_input_utils import update_live_transcription, format_response_for_user
from services.embeddings import configure_embeddings
from services.indexing import create_symptom_index
import torchaudio.transforms as T
import re
import logging, sys
import json
from llama_cpp import Llama


logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
logger = logging.getLogger(__name__)


# ========== Model setup ==========
MODEL_NAME, REPO_ID = select_best_model()
model_path = ensure_model()
print(f"Using model: {MODEL_NAME} from {REPO_ID}", flush=True)
print(f"Model path: {model_path}", flush=True)

# ========== LLM initialization ==========
print("\n<<< before build_llm: ", flush=True)
llm = build_llm(model_path)
print(">>> after build_llm", flush=True)

# ========== Embeddings & index setup ==========
print("\n<<< before configure_embeddings: ", flush=True)
configure_embeddings()
print(">>> after configure_embeddings", flush=True)
print("Embeddings configured and ready", flush=True)

print("\n<<< before create_symptom_index: ", flush=True)
symptom_index = create_symptom_index()
print(">>> after create_symptom_index", flush=True)
print("Symptom index built successfully. Ready for queries.", flush=True)

# ========== Prompt template ==========
SYSTEM_PROMPT = (
    "You are a medical assistant helping a user find the most relevant ICD-10 code based on their symptoms.\n",
    "At each turn, determine the top three most relevant ICD-10 codes based on input from the user.\n",
    "Assign a confidence score from 1 to 100 for each code you decided was relevant.\n",
    "Asking a question to the user to raise or lower your confidence score for each code.\n",
    "Replace low-confidence codes with new ones as you learn more.\n",
    "Your goal is to find the most relevant codes with high confidence.\n",
    "When you have high confidence in a code, provide it to the user.\n",
    "Maintain a conversational tone and explain your reasoning step by step.\n",
    "If you need more information, ask the user clarifying questions.\n",
    "End your response with a summary of the top codes and their confidence scores.\n",
    "If you need to ask the user a follow-up question, do so clearly.\n",
)

def truncate_prompt(prompt, max_tokens=2048):
    # Use your model's tokenizer here; this is a placeholder
    tokens = prompt.split()  # Replace with actual tokenization
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
    return " ".join(tokens)

# Initialize your model (adjust path and params as needed)
llm = Llama(model_path=model_path)

def truncate_prompt_llama(prompt, max_tokens=2048):
    # Tokenize the prompt using llama_cpp's tokenizer
    tokens = llm.tokenize(prompt.encode("utf-8"))
    if len(tokens) > max_tokens:
        # Truncate tokens and decode back to string
        tokens = tokens[:max_tokens]
        prompt = llm.detokenize(tokens).decode("utf-8", errors="ignore")
    return prompt

# ========== Generator handler ==========
def on_submit(symptoms_text, history):
    log = []
    print("on_submit called", flush=True)

    # Placeholder
    msg = "🔍 Received input"
    log.append(msg)
    print(msg, flush=True)
    history = history + [{"role": "assistant", "content": "Processing your request..."}]
    yield history, None, "\n".join(log)

    # Validate
    if not symptoms_text.strip():
        msg = "❌ No symptoms provided"
        log.append(msg)
        print(msg, flush=True)
        result = {"error": "No input provided", "diagnoses": [], "confidences": [], "follow_up": []}
        yield history, result, "\n".join(log)
        return

    # Clean input
    cleaned = symptoms_text.strip()
    msg = f"🔄 Cleaned text: {cleaned}"
    log.append(msg)
    print(msg, flush=True)
    yield history, None, "\n".join(log)

    # Semantic query
    msg = "🔍 Running semantic query"
    log.append(msg)
    print(msg, flush=True)
    yield history, None, "\n".join(log)

    qe = symptom_index.as_query_engine(retriever_kwargs={"similarity_top_k": 5})
    hits = qe.query(cleaned)
    msg = f"🔍 Retrieved context entries"
    log.append(msg)
    print(msg, flush=True)
    history = history + [{"role": "assistant", "content": msg}]
    yield history, None, "\n".join(log)

    # Build prompt with minimal context
    context_list = []
    for node in getattr(hits, 'source_nodes', [])[:3]:
        md = getattr(node, 'metadata', {}) or {}
        context_list.append(f"{md.get('code','')}: {md.get('description','')}")
    context_text = "\n".join(context_list)
    prompt = "\n".join([
        f"{SYSTEM_PROMPT}",
        f"User symptoms: '{cleaned}'",
        f"Relevant ICD-10 context:\n{context_text}",
    ])
    prompt = truncate_prompt_llama(prompt, max_tokens=2048)

    msg = "✏️ Prompt built"
    log.append(msg)
    print(msg, flush=True)
    yield history, None, "\n".join(log)

    # Call LLM
    response = llm(prompt=prompt)
    raw = response
    # Extract text from CompletionResponse if needed
    if hasattr(raw, "text"):
        raw = raw.text
    elif hasattr(raw, "content"):
        raw = raw.content
    # Now ensure it's a dict
    if isinstance(raw, str):
        try:
            raw = json.loads(raw)
        except Exception:
            raw = {"diagnoses": [], "confidences": [], "follow_up": raw}
    assistant_msg = format_response_for_user(raw)
    history = history + [{"role": "assistant", "content": assistant_msg}]
    msg = "✅ Final response appended"
    log.append(msg)
    print(msg, flush=True)
    yield history, raw, "\n".join(log)

# ========== Gradio UI ==========
with gr.Blocks(theme="default") as demo:
    gr.Markdown("""
    # 🏥 Medical Symptom to ICD-10 Code Assistant
    ## Describe symptoms by typing or speaking.
    Debug log updates live below.
    """
    )
    with gr.Row():
        with gr.Column(scale=2):
            text_input = gr.Textbox(
                label="Type your symptoms",
                placeholder="I'm feeling under the weather...",
                lines=3
            )
            microphone = gr.Audio(
                sources=["microphone"],
                streaming=True,
                type="numpy",
                label="Or speak your symptoms..."
            )
            submit_btn = gr.Button("Submit", variant="primary")
            clear_btn = gr.Button("Clear Chat", variant="secondary")
            chatbot = gr.Chatbot(
                label="Medical Consultation",
                height=500,
                type="messages"
            )
            json_output = gr.JSON(label="Diagnosis JSON")
            debug_box = gr.Textbox(label="Debug log", lines=10)
        with gr.Column(scale=1):
            with gr.Accordion("API Keys (optional)", open=False):
                api_key = gr.Textbox(label="OpenAI Key", type="password")
                model_selector = gr.Dropdown(
                    choices=["OpenAI","Modal","Anthropic","MistralAI","Nebius","Hyperbolic","SambaNova"],
                    value="OpenAI",
                    label="Model Provider"
                )
                temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")

    # Bindings
    submit_btn.click(
        fn=on_submit,
        inputs=[text_input, chatbot],
        outputs=[chatbot, json_output, debug_box],
        queue=True
    )
    clear_btn.click(
        lambda: (None, {}, ""),
        None,
        [chatbot, json_output, debug_box],
        queue=False
    )
    microphone.stream(
        fn=update_live_transcription,
        inputs=[microphone],
        outputs=[text_input],
        queue=True
    )

    # --- About the Creator ---
    gr.Markdown("""
    ---
    ### 👋 About the Creator

    Hi! I'm Graham Paasch, an experienced technology professional!

    🎥 **Check out my YouTube channel** for more tech content:  
    [Subscribe to my channel](https://www.youtube.com/channel/UCg3oUjrSYcqsL9rGk1g_lPQ)

    💼 **Looking for a skilled developer?**
    I'm currently seeking new opportunities! View my experience and connect on [LinkedIn](https://www.linkedin.com/in/grahampaasch/)

    ⭐ If you found this tool helpful, please consider:
    - Subscribing to my YouTube channel
    - Connecting on LinkedIn
    - Sharing this tool with others in healthcare tech
    """
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True, show_api=True, mcp_server=True)