patient_bot

Running

App Files Files Community

WebashalarForML commited on 21 days ago

Commit

42e73f2

verified ·

1 Parent(s): bc47476

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -543

app.py CHANGED Viewed

@@ -1,117 +1,48 @@
 #!/usr/bin/env python3
-# app.py - Health Reports processing agent (PDF -> cleaned text -> structured JSON)
 import os
 import json
 import logging
 import re
 from pathlib import Path
-from typing import List, Dict, Any
-from werkzeug.utils import secure_filename
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 from dotenv import load_dotenv
-from unstructured.partition.pdf import partition_pdf
-# Bloatectomy class (as per the source you provided)
 from bloatectomy import bloatectomy
-# LLM / agent
 from langchain_groq import ChatGroq
-from langgraph.prebuilt import create_react_agent
-# LangGraph imports
-from langgraph.graph import StateGraph, START, END
 from typing_extensions import TypedDict, NotRequired
-# --- Logging ---------------------------------------------------------------
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
-logger = logging.getLogger("health-agent")
-# --- Environment & config -------------------------------------------------
 load_dotenv()
-from pathlib import Path
-REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", "reports")).resolve()   # e.g. /app/reports/<patient_id>/<file.pdf>
-SSRI_FILE = Path(os.getenv("SSRI_FILE", "app/medicationCategories/SSRI_list.txt")).resolve()
-MISC_FILE = Path(os.getenv("MISC_FILE", "app/medicationCategories/MISC_list.txt")).resolve()
-GROQ_API_KEY = os.getenv("GROQ_API_KEY", None)
-ALLOWED_EXTENSIONS = {"pdf"}
-# --- LLM setup -------------------------------------------------------------
 llm = ChatGroq(
     model=os.getenv("LLM_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct"),
     temperature=0.0,
-    max_tokens=None,
 )
-# Top-level strict system prompt for report JSON pieces (each node will use a more specific prompt)
-NODE_BASE_INSTRUCTIONS = """
-You are HealthAI — a clinical assistant producing JSON for downstream processing.
-Produce only valid JSON (no extra text). Follow field types exactly. If missing data, return empty strings or empty arrays.
-Be conservative: do not assert diagnoses; provide suggestions and ask physician confirmation where needed.
-"""
-# Build a generic agent and a JSON resolver agent (to fix broken JSON from LLM)
-agent = create_react_agent(model=llm, tools=[], prompt=NODE_BASE_INSTRUCTIONS)
-agent_json_resolver = create_react_agent(model=llm, tools=[], prompt="""
-You are a JSON fixer. Input: a possibly-malformed JSON-like text. Output: valid JSON only (enclosed in triple backticks).
-Fix missing quotes, trailing commas, unescaped newlines, stray assistant labels, and ensure schema compliance.
-""")
-# -------------------- JSON extraction / sanitizer ---------------------------
-def extract_json_from_llm_response(raw_response: str) -> dict:
-    try:
-        # --- 1) Pull out the JSON code-block if present ---
-        md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
-        json_string = md.group(1).strip() if md else raw_response
-        # --- 2) Trim to the outermost { … } so we drop any prefix/suffix junk ---
-        first, last = json_string.find('{'), json_string.rfind('}')
-        if 0 <= first < last:
-            json_string = json_string[first:last+1]
-        # --- 3) PRE-CLEANUP: remove rogue assistant labels, fix boolean quotes ---
-        json_string = re.sub(r'\b\w+\s*{', '{', json_string)
-        json_string = re.sub(r'"assistant"\s*:', '', json_string)
-        json_string = re.sub(r'\b(false|true)"', r'\1', json_string)
-        # --- 4) Escape embedded quotes in long string fields (best-effort) ---
-        def _esc(m):
-            prefix, body = m.group(1), m.group(2)
-            return prefix + body.replace('"', r'\"')
-        json_string = re.sub(
-            r'("logic"\s*:\s*")([\s\S]+?)(?=",\s*"[A-Za-z_]\w*"\s*:\s*)',
-            _esc,
-            json_string
-        )
-        # --- 5) Remove trailing commas before } or ] ---
-        json_string = re.sub(r',\s*(?=[}\],])', '', json_string)
-        json_string = re.sub(r',\s*,', ',', json_string)
-        # --- 6) Balance braces if obvious excess ---
-        ob, cb = json_string.count('{'), json_string.count('}')
-        if cb > ob:
-            excess = cb - ob
-            json_string = json_string.rstrip()[:-excess]
-        # --- 7) Escape literal newlines inside strings so json.loads can parse ---
-        def _escape_newlines_in_strings(s: str) -> str:
-            return re.sub(
-                r'"((?:[^"\\]|\\.)*?)"',
-                lambda m: '"' + m.group(1).replace('\n', '\\n').replace('\r', '\\r') + '"',
-                s,
-                flags=re.DOTALL
-            )
-        json_string = _escape_newlines_in_strings(json_string)
-        # Final parse
-        return json.loads(json_string)
-    except Exception as e:
-        logger.error(f"Failed to extract JSON from LLM response: {e}")
-        raise
-# -------------------- Utility: Bloatectomy wrapper ------------------------
 def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
     try:
         b = bloatectomy(text, style=style, output="html")
         tokens = getattr(b, "tokens", None)
@@ -122,480 +53,185 @@ def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
         logger.exception("Bloatectomy cleaning failed; returning original text")
         return text
-# --------------- Utility: medication extraction (adapted) -----------------
-def readDrugs_from_file(path: Path):
-    try:
-        if not path.exists():
-            return {}, []
-        txt = path.read_text(encoding="utf-8", errors="ignore")
-        generics = re.findall(r"^(.*?)\|", txt, re.MULTILINE)
-        generics = [g.lower() for g in generics if g]
-        lines = [ln.strip().lower() for ln in txt.splitlines() if ln.strip()]
-        return dict(zip(generics, lines)), generics
-    except Exception:
-        logger.exception(f"Failed to read drugs from file: {path}")
-        return {}, []
-def addToDrugs_line(line: str, drugs_flags: List[int], listing: Dict[str,str], genList: List[str]) -> List[int]:
-    try:
-        gen_index = {g:i for i,g in enumerate(genList)}
-        for generic, pattern_line in listing.items():
-            try:
-                if re.search(pattern_line, line, re.I):
-                    idx = gen_index.get(generic)
-                    if idx is not None:
-                        drugs_flags[idx] = 1
-            except re.error:
-                continue
-        return drugs_flags
-    except Exception:
-        logger.exception("Error in addToDrugs_line")
-        return drugs_flags
-def extract_medications_from_text(text: str) -> List[str]:
-    try:
-        ssri_map, ssri_generics = readDrugs_from_file(SSRI_FILE)
-        misc_map, misc_generics = readDrugs_from_file(MISC_FILE)
-        combined_map = {**ssri_map, **misc_map}
-        combined_generics = []
-        if ssri_generics:
-            combined_generics.extend(ssri_generics)
-        if misc_generics:
-            combined_generics.extend(misc_generics)
-        flags = [0]* len(combined_generics)
-        meds_found = set()
-        for ln in text.splitlines():
-            ln = ln.strip()
-            if not ln:
-                continue
-            if combined_map:
-                flags = addToDrugs_line(ln, flags, combined_map, combined_generics)
-            m = re.search(r"\b(Rx|Drug|Medication|Prescribed|Tablet)\s*[:\-]?\s*([A-Za-z0-9\-\s/\.]+)", ln, re.I)
-            if m:
-                meds_found.add(m.group(2).strip())
-            m2 = re.findall(r"\b([A-Z][a-z0-9\-]{2,}\s*(?:[0-9]{1,4}\s*(?:mg|mcg|g|IU))?)", ln)
-            for s in m2:
-                if re.search(r"\b(mg|mcg|g|IU)\b", s, re.I):
-                    meds_found.add(s.strip())
-        for i, f in enumerate(flags):
-            if f == 1:
-                meds_found.add(combined_generics[i])
-        return list(meds_found)
-    except Exception:
-        logger.exception("Failed to extract medications from text")
-        return []
-# -------------------- Node prompts --------------------------
-PATIENT_NODE_PROMPT = """
-You will extract patientDetails from the provided document texts.
-Return ONLY JSON with this exact shape:
-{ "patientDetails": {"name": "", "age": "", "sex": "", "pid": ""} }
-Fill fields using text evidence or leave empty strings.
 """
-DOCTOR_NODE_PROMPT = """
-You will extract doctorDetails found in the documents.
-Return ONLY JSON with this exact shape:
-{ "doctorDetails": {"referredBy": ""} }
-"""
-TEST_REPORT_NODE_PROMPT = """
-You will extract per-test structured results from the documents.
-Return ONLY JSON with this exact shape:
-{
- "reports": [
-   {
-     "testName": "",
-     "dateReported": "",
-     "timeReported": "",
-     "abnormalFindings": [
-       {"investigation": "", "result": 0, "unit": "", "status": "", "referenceValue": ""}
-     ],
-     "interpretation": "",
-     "trends": []
-   }
- ]
-}
-- Include only findings that are outside reference ranges OR explicitly called 'abnormal' in the report.
-- For result numeric parsing, prefer numeric values; if not numeric, keep original string.
-- Use statuses: Low, High, Borderline, Positive, Negative, Normal.
-"""
-ANALYSIS_NODE_PROMPT = """
-You will create an overallAnalysis based on the extracted reports (the agent will give you the 'reports' JSON).
-Return ONLY JSON:
-{ "overallAnalysis": { "summary": "", "recommendations": "", "longTermTrends": "",""risk_prediction": "","drug_interaction": "" } }
-Be conservative, evidence-based, and suggest follow-up steps for physicians.
-"""
-CONDITION_LOOP_NODE_PROMPT = """
-Validation and condition node:
-Input: partial JSON (patientDetails, doctorDetails, reports, overallAnalysis).
-Task: Check required keys exist and that each report has at least testName and abnormalFindings list.
-Return ONLY JSON:
-{ "valid": true, "missing": [] }
-If missing fields, list keys in 'missing'. Do NOT modify content.
-"""
-# -------------------- Node helpers -------------------------
-def call_node_agent(node_prompt: str, payload: dict) -> dict:
-    """
-    Call the generic agent with a targeted node prompt and the payload.
-    Tries to parse JSON. If parsing fails, uses the JSON resolver agent once.
-    """
     try:
-        content = {
-            "prompt": node_prompt,
-            "payload": payload
-        }
-        resp = agent.invoke({"messages": [{"role": "user", "content": json.dumps(content)}]})
-        # Extract raw text from AIMessage or other response types
-        raw = None
-        if isinstance(resp, str):
-            raw = resp
-        elif hasattr(resp, "content"):  # AIMessage or similar
-            raw = resp.content
-        elif isinstance(resp, dict):
-            msgs = resp.get("messages")
-            if msgs:
-                last_msg = msgs[-1]
-                if isinstance(last_msg, str):
-                    raw = last_msg
-                elif hasattr(last_msg, "content"):
-                    raw = last_msg.content
-                elif isinstance(last_msg, dict):
-                    raw = last_msg.get("content", "")
-                else:
-                    raw = str(last_msg)
-            else:
-                raw = json.dumps(resp)
-        else:
-            raw = str(resp)
-        parsed = extract_json_from_llm_response(raw)
         return parsed
-    except Exception as e:
-        logger.warning("Node agent JSON parse failed: %s. Attempting JSON resolver.", e)
-        try:
-            resolver_prompt = f"Fix this JSON. Input:\n```json\n{raw}\n```\nReturn valid JSON only."
-            r = agent_json_resolver.invoke({"messages": [{"role": "user", "content": resolver_prompt}]})
-            rtxt = None
-            if isinstance(r, str):
-                rtxt = r
-            elif hasattr(r, "content"):
-                rtxt = r.content
-            elif isinstance(r, dict):
-                msgs = r.get("messages")
-                if msgs:
-                    last_msg = msgs[-1]
-                    if isinstance(last_msg, str):
-                        rtxt = last_msg
-                    elif hasattr(last_msg, "content"):
-                        rtxt = last_msg.content
-                    elif isinstance(last_msg, dict):
-                        rtxt = last_msg.get("content", "")
-                    else:
-                        rtxt = str(last_msg)
-                else:
-                    rtxt = json.dumps(r)
-            else:
-                rtxt = str(r)
-            corrected = extract_json_from_llm_response(rtxt)
-            return corrected
-        except Exception as e2:
-            logger.exception("JSON resolver also failed: %s", e2)
-            return {}
-# -------------------- Define LangGraph State schema -------------------------
-class State(TypedDict):
-    patient_meta: NotRequired[Dict[str, Any]]
-    patient_id: str
-    documents: List[Dict[str, Any]]
-    medications: List[str]
-    patientDetails: NotRequired[Dict[str, Any]]
-    doctorDetails: NotRequired[Dict[str, Any]]
-    reports: NotRequired[List[Dict[str, Any]]]
-    overallAnalysis: NotRequired[Dict[str, Any]]
-    valid: NotRequired[bool]
-    missing: NotRequired[List[str]]
-# -------------------- Node implementations as LangGraph nodes -------------------------
-def patient_details_node(state: State) -> dict:
-    payload = {
-        "patient_meta": state.get("patient_meta", {}),
-        "documents": state.get("documents", []),
-        "medications": state.get("medications", [])
-    }
-    logger.info("Running patient_details_node")
-    out = call_node_agent(PATIENT_NODE_PROMPT, payload)
-    return {"patientDetails": out.get("patientDetails", {}) if isinstance(out, dict) else {}}
-def doctor_details_node(state: State) -> dict:
-    payload = {
-        "documents": state.get("documents", []),
-        "medications": state.get("medications", [])
-    }
-    logger.info("Running doctor_details_node")
-    out = call_node_agent(DOCTOR_NODE_PROMPT, payload)
-    return {"doctorDetails": out.get("doctorDetails", {}) if isinstance(out, dict) else {}}
-def test_report_node(state: State) -> dict:
-    payload = {
-        "documents": state.get("documents", []),
-        "medications": state.get("medications", [])
-    }
-    logger.info("Running test_report_node")
-    out = call_node_agent(TEST_REPORT_NODE_PROMPT, payload)
-    return {"reports": out.get("reports", []) if isinstance(out, dict) else []}
-def analysis_node(state: State) -> dict:
-    payload = {
-        "patientDetails": state.get("patientDetails", {}),
-        "doctorDetails": state.get("doctorDetails", {}),
-        "reports": state.get("reports", []),
-        "medications": state.get("medications", [])
-    }
-    logger.info("Running analysis_node")
-    out = call_node_agent(ANALYSIS_NODE_PROMPT, payload)
-    return {"overallAnalysis": out.get("overallAnalysis", {}) if isinstance(out, dict) else {}}
-def condition_loop_node(state: State) -> dict:
-    payload = {
-        "patientDetails": state.get("patientDetails", {}),
-        "doctorDetails": state.get("doctorDetails", {}),
-        "reports": state.get("reports", []),
-        "overallAnalysis": state.get("overallAnalysis", {})
-    }
-    logger.info("Running condition_loop_node (validation)")
-    out = call_node_agent(CONDITION_LOOP_NODE_PROMPT, payload)
-    if isinstance(out, dict) and "valid" in out:
-        return {"valid": bool(out.get("valid")), "missing": out.get("missing", [])}
-    missing = []
-    if not state.get("patientDetails"):
-        missing.append("patientDetails")
-    if not state.get("reports"):
-        missing.append("reports")
-    return {"valid": len(missing) == 0, "missing": missing}
-# -------------------- Build LangGraph StateGraph -------------------------
-graph_builder = StateGraph(State)
-graph_builder.add_node("patient_details", patient_details_node)
-graph_builder.add_node("doctor_details", doctor_details_node)
-graph_builder.add_node("test_report", test_report_node)
-graph_builder.add_node("analysis", analysis_node)
-graph_builder.add_node("condition_loop", condition_loop_node)
-graph_builder.add_edge(START, "patient_details")
-graph_builder.add_edge("patient_details", "doctor_details")
-graph_builder.add_edge("doctor_details", "test_report")
-graph_builder.add_edge("test_report", "analysis")
-graph_builder.add_edge("analysis", "condition_loop")
-graph_builder.add_edge("condition_loop", END)
-graph = graph_builder.compile()
-# -------------------- Flask app & endpoints -------------------------------
-# -------------------- Flask app & endpoints -------------------------------
-BASE_DIR = Path(__file__).resolve().parent
-static_folder = BASE_DIR / "static"
-app = Flask(__name__, static_folder=str(static_folder), static_url_path="/static")
-CORS(app)  # dev convenience; lock down in production
-# serve frontend root
 @app.route("/", methods=["GET"])
 def serve_frontend():
     try:
-        return app.send_static_file("frontend.html")
-    except Exception as e:
-        logger.error(f"Failed to serve frontend.html: {e}")
-        return "<h3>frontend.html not found in static/ — drop your frontend.html there.</h3>", 404
-@app.route("/process_reports", methods=["POST"])
-def process_reports():
-    try:
-        data = request.get_json(force=True)
-    except Exception as e:
-        logger.error(f"Failed to parse JSON request: {e}")
-        return jsonify({"error": "Invalid JSON request"}), 400
     patient_id = data.get("patient_id")
-    filenames = data.get("filenames", [])
-    extra_patient_meta = data.get("patientDetails", {})
-    if not patient_id or not filenames:
-        return jsonify({"error": "missing patient_id or filenames"}), 400
-    patient_folder = REPORTS_ROOT / str(patient_id)
-    if not patient_folder.exists() or not patient_folder.is_dir():
-        return jsonify({"error": f"patient folder not found: {patient_folder}"}), 404
-    documents = []
     combined_text_parts = []
-    for fname in filenames:
-        file_path = patient_folder / fname
-        if not file_path.exists():
-            logger.warning("file not found: %s", file_path)
-            continue
-        try:
-            elements = partition_pdf(filename=str(file_path))
-            page_text = "\n".join([el.text for el in elements if hasattr(el, "text") and el.text])
-        except Exception:
-            logger.exception(f"Failed to parse PDF {file_path}")
             page_text = ""
-        try:
-            cleaned = clean_notes_with_bloatectomy(page_text, style="remov")
-        except Exception:
-            logger.exception("Failed to clean notes with bloatectomy")
-            cleaned = page_text
-        documents.append({
-            "filename": fname,
-            "raw_text": page_text,
-            "cleaned_text": cleaned
-        })
-        combined_text_parts.append(cleaned)
-    if not documents:
-        return jsonify({"error": "no valid documents found"}), 400
-    combined_text = "\n\n".join(combined_text_parts)
-    try:
-        meds = extract_medications_from_text(combined_text)
-    except Exception:
-        logger.exception("Failed to extract medications")
-        meds = []
-    initial_state = {
-        "patient_meta": extra_patient_meta,
-        "patient_id": patient_id,
-        "documents": documents,
-        "medications": meds
-    }
     try:
-        result_state = graph.invoke(initial_state)
-        # Validate and fill placeholders if needed
-        if not result_state.get("valid", True):
-            missing = result_state.get("missing", [])
-            logger.info(f"Validation failed; missing keys: {missing}")
-            if "patientDetails" in missing:
-                result_state["patientDetails"] = extra_patient_meta or {"name": "", "age": "", "sex": "", "pid": patient_id}
-            if "reports" in missing:
-                result_state["reports"] = []
-            # Re-run analysis node to keep overallAnalysis consistent
-            result_state.update(analysis_node(result_state))
-            # Re-validate
-            cond = condition_loop_node(result_state)
-            result_state.update(cond)
-        safe_response = {
-            "patientDetails": result_state.get("patientDetails", {"name": "", "age": "", "sex": "", "pid": patient_id}),
-            "doctorDetails": result_state.get("doctorDetails", {"referredBy": ""}),
-            "reports": result_state.get("reports", []),
-            "overallAnalysis": result_state.get("overallAnalysis", {"summary": "", "recommendations": "", "longTermTrends": ""}),
-            "_pre_extracted_medications": result_state.get("medications", []),
-            "_validation": {
-                "valid": result_state.get("valid", True),
-                "missing": result_state.get("missing", [])
-            }
-        }
-        return jsonify(safe_response), 200
     except Exception as e:
-        logger.exception("Node pipeline failed")
-        return jsonify({"error": "Node pipeline failed", "detail": str(e)}), 500
-@app.route("/upload_reports", methods=["POST"])
-def upload_reports():
-    """
-    Upload one or more files for a patient.
-    Expects multipart/form-data with:
-      - patient_id (form field)
-      - files (one or multiple files; use the same field name 'files' for each file)
-    Example curl:
-    curl -X POST http://localhost:7860/upload_reports \
-      -F "patient_id=12345" \
-      -F "files[]=@/path/to/report1.pdf" \
-      -F "files[]=@/path/to/report2.pdf"
-    """
-    try:
-        # patient id can be in form or args (for convenience)
-        patient_id = request.form.get("patient_id") or request.args.get("patient_id")
-        if not patient_id:
-            return jsonify({"error": "patient_id form field required"}), 400
-        # get uploaded files (support both files and files[] naming)
-        uploaded_files = request.files.getlist("files")
-        if not uploaded_files:
-            # fallback: single file under name 'file'
-            single = request.files.get("file")
-            if single:
-                uploaded_files = [single]
-        if not uploaded_files:
-            return jsonify({"error": "no files uploaded (use form field 'files')"}), 400
-        # create patient folder under REPORTS_ROOT/<patient_id>
-        patient_folder = REPORTS_ROOT / str(patient_id)
-        patient_folder.mkdir(parents=True, exist_ok=True)
-        saved = []
-        skipped = []
-        for file_storage in uploaded_files:
-            orig_name = getattr(file_storage, "filename", "") or ""
-            filename = secure_filename(orig_name)
-            if not filename:
-                skipped.append({"filename": orig_name, "reason": "invalid filename"})
-                continue
-            # extension check
-            ext = filename.rsplit(".", 1)[1].lower() if "." in filename else ""
-            if ext not in ALLOWED_EXTENSIONS:
-                skipped.append({"filename": filename, "reason": f"extension '{ext}' not allowed"})
-                continue
-            # avoid overwriting: if collision, add numeric suffix
-            dest = patient_folder / filename
-            if dest.exists():
-                base, dot, extension = filename.rpartition(".")
-                # if no base (e.g. ".bashrc") fallback
-                base = base or filename
-                i = 1
-                while True:
-                    candidate = f"{base}__{i}.{extension}" if extension else f"{base}__{i}"
-                    dest = patient_folder / candidate
-                    if not dest.exists():
-                        filename = candidate
-                        break
-                    i += 1
-            try:
-                file_storage.save(str(dest))
-                saved.append(filename)
-            except Exception as e:
-                logger.exception("Failed to save uploaded file %s: %s", filename, e)
-                skipped.append({"filename": filename, "reason": f"save failed: {e}"})
-        return jsonify({
-            "patient_id": str(patient_id),
-            "saved": saved,
-            "skipped": skipped,
-            "patient_folder": str(patient_folder)
-        }), 200
-    except Exception as exc:
-        logger.exception("Upload failed: %s", exc)
-        return jsonify({"error": "upload failed", "detail": str(exc)}), 500
 @app.route("/ping", methods=["GET"])
 def ping():
@@ -604,4 +240,3 @@ def ping():
 if __name__ == "__main__":
     port = int(os.getenv("PORT", 7860))
     app.run(host="0.0.0.0", port=port, debug=True)

 #!/usr/bin/env python3
 import os
 import json
 import logging
 import re
+from typing import Dict, Any
 from pathlib import Path
+from unstructured.partition.pdf import partition_pdf
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 from dotenv import load_dotenv
 from bloatectomy import bloatectomy
+from werkzeug.utils import secure_filename
 from langchain_groq import ChatGroq
 from typing_extensions import TypedDict, NotRequired
+# --- Logging ---
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+logger = logging.getLogger("patient-assistant")
+# --- Load environment ---
 load_dotenv()
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+if not GROQ_API_KEY:
+    logger.error("GROQ_API_KEY not set in environment")
+    exit(1)
+# --- Flask app setup ---
+BASE_DIR = Path(__file__).resolve().parent
+REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", str(BASE_DIR / "reports")))
+static_folder = BASE_DIR / "static"
+app = Flask(__name__, static_folder=str(static_folder), static_url_path="/static")
+CORS(app)
+# --- LLM setup ---
 llm = ChatGroq(
     model=os.getenv("LLM_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct"),
     temperature=0.0,
+    max_tokens=1024,
+    api_key=GROQ_API_KEY,
 )
 def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
+    """Helper function to clean up text using the bloatectomy library."""
     try:
         b = bloatectomy(text, style=style, output="html")
         tokens = getattr(b, "tokens", None)
         logger.exception("Bloatectomy cleaning failed; returning original text")
         return text
+# --- Agent prompt instructions ---
+PATIENT_ASSISTANT_PROMPT = """
+You are a patient assistant helping to analyze medical records and reports. Your goal is to provide a comprehensive response based on the patient's health history and the current conversation.
+Your tasks include:
+- Analyzing medical records and reports to detect anomalies, redundant tests, or misleading treatments.
+- Suggesting preventive care based on the overall patient health history.
+- Optimizing healthcare costs by comparing past visits and treatments, helping patients make smarter choices.
+- Offering personalized lifestyle recommendations, such as adopting healthier food habits, daily routines, and regular health checks.
+- Generating a natural, helpful reply to the user.
+You will be provided with the last user message, the conversation history, and a summary of the patient's medical reports. Use this information to give a tailored and informative response.
+STRICT OUTPUT FORMAT (JSON ONLY):
+Return a single JSON object with the following keys:
+- assistant_reply: string  // a natural language reply to the user (short, helpful, always present)
+- patientDetails: object  // keys may include name, problem, city, contact (update if user shared info)
+- conversationSummary: string (optional)  // short summary of conversation + relevant patient docs
+Rules:
+- ALWAYS include `assistant_reply` as a non-empty string.
+- Do NOT produce any text outside the JSON object.
+- Be concise in `assistant_reply`. If you need more details, ask a targeted follow-up question.
+- Do not make up information that is not present in the provided medical reports or conversation history.
 """
+# --- JSON extraction helper ---
+def extract_json_from_llm_response(raw_response: str) -> dict:
+    """Safely extracts a JSON object from a string that might contain extra text or markdown."""
+    default = {
+        "assistant_reply": "I'm sorry — I couldn't understand that. Could you please rephrase?",
+        "patientDetails": {},
+        "conversationSummary": "",
+    }
+    if not raw_response or not isinstance(raw_response, str):
+        return default
+    # Find the JSON object, ignoring any markdown code fences
+    m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
+    json_string = m.group(1).strip() if m else raw_response
+    # Find the first opening brace and the last closing brace
+    first = json_string.find('{')
+    last = json_string.rfind('}')
+    if first == -1 or last == -1 or first >= last:
+        try:
+            return json.loads(json_string)
+        except Exception:
+            logger.warning("Could not locate JSON braces in LLM output. Falling back to default.")
+            return default
+    candidate = json_string[first:last+1]
+    # Remove trailing commas that might cause parsing issues
+    candidate = re.sub(r',\s*(?=[}\]])', '', candidate)
     try:
+        parsed = json.loads(candidate)
+    except Exception as e:
+        logger.warning("Failed to parse JSON from LLM output: %s", e)
+        return default
+    # Basic validation of the parsed JSON
+    if isinstance(parsed, dict) and "assistant_reply" in parsed and isinstance(parsed["assistant_reply"], str) and parsed["assistant_reply"].strip():
+        parsed.setdefault("patientDetails", {})
+        parsed.setdefault("conversationSummary", "")
         return parsed
+    else:
+        logger.warning("Parsed JSON missing 'assistant_reply' or invalid format. Returning default.")
+        return default
+# --- Flask routes ---
 @app.route("/", methods=["GET"])
 def serve_frontend():
+    """Serves the frontend HTML file."""
     try:
+        return app.send_static_file("frontend2.html")
+    except Exception:
+        return "<h3>frontend2.html not found in static/ — please add your frontend2.html there.</h3>", 404
+@app.route("/chat", methods=["POST"])
+def chat():
+    """Handles the chat conversation with the assistant."""
+    data = request.get_json(force=True)
+    if not isinstance(data, dict):
+        return jsonify({"error": "invalid request body"}), 400
     patient_id = data.get("patient_id")
+    if not patient_id:
+        return jsonify({"error": "patient_id required"}), 400
+    chat_history = data.get("chat_history") or []
+    patient_state = data.get("patient_state") or {}
+    # --- Read and parse patient reports ---
+    patient_folder = REPORTS_ROOT / f"p_{patient_id}"
     combined_text_parts = []
+    if patient_folder.exists() and patient_folder.is_dir():
+        for fname in sorted(os.listdir(patient_folder)):
+            file_path = patient_folder / fname
             page_text = ""
+            if partition_pdf is not None and str(file_path).lower().endswith('.pdf'):
+                try:
+                    elements = partition_pdf(filename=str(file_path))
+                    page_text = "\n".join([el.text for el in elements if hasattr(el, 'text') and el.text])
+                except Exception:
+                    logger.exception("Failed to parse PDF %s", file_path)
+            else:
+                try:
+                    page_text = file_path.read_text(encoding='utf-8', errors='ignore')
+                except Exception:
+                    page_text = ""
+            if page_text:
+                cleaned = clean_notes_with_bloatectomy(page_text, style="remov")
+                if cleaned:
+                    combined_text_parts.append(cleaned)
+    # --- Prepare the state for the LLM ---
+    state = patient_state.copy()
+    state["lastUserMessage"] = ""
+    if chat_history:
+        # Find the last user message
+        for msg in reversed(chat_history):
+            if msg.get("role") == "user" and msg.get("content"):
+                state["lastUserMessage"] = msg["content"]
+                break
+    # Update the conversation summary with the parsed documents
+    base_summary = state.get("conversationSummary", "") or ""
+    docs_summary = "\n\n".join(combined_text_parts)
+    if docs_summary:
+        state["conversationSummary"] = (base_summary + "\n\n" + docs_summary).strip()
+    else:
+        state["conversationSummary"] = base_summary
+    # --- Direct LLM Invocation ---
+    user_prompt = f"""
+Current patientDetails: {json.dumps(state.get("patientDetails", {}))}
+Current conversationSummary: {state.get("conversationSummary", "")}
+Last user message: {state.get("lastUserMessage", "")}
+Return ONLY valid JSON with keys: assistant_reply, patientDetails, conversationSummary.
+"""
+    messages = [
+        {"role": "system", "content": PATIENT_ASSISTANT_PROMPT},
+        {"role": "user", "content": user_prompt}
+    ]
     try:
+        logger.info("Invoking LLM with prepared state and prompt...")
+        llm_response = llm.invoke(messages)
+        raw_response = ""
+        if hasattr(llm_response, "content"):
+            raw_response = llm_response.content
+        else:
+            raw_response = str(llm_response)
+        logger.info(f"Raw LLM response: {raw_response}")
+        parsed_result = extract_json_from_llm_response(raw_response)
     except Exception as e:
+        logger.exception("LLM invocation failed")
+        return jsonify({"error": "LLM invocation failed", "detail": str(e)}), 500
+    updated_state = parsed_result or {}
+    assistant_reply = updated_state.get("assistant_reply")
+    if not assistant_reply or not isinstance(assistant_reply, str) or not assistant_reply.strip():
+        # Fallback to a polite message if the LLM response is invalid or empty
+        assistant_reply = "I'm here to help — could you tell me more about your symptoms?"
+    response_payload = {
+        "assistant_reply": assistant_reply,
+        "updated_state": updated_state,
+    }
+    return jsonify(response_payload)
 @app.route("/ping", methods=["GET"])
 def ping():
 if __name__ == "__main__":
     port = int(os.getenv("PORT", 7860))
     app.run(host="0.0.0.0", port=port, debug=True)