patient_bot

Sleeping

App Files Files Community

WebashalarForML commited on 20 days ago

Commit

b2ae2a5

verified ·

1 Parent(s): 5fb93c3

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -104

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ logger = logging.getLogger("health-agent")
 # --- Environment & config -------------------------------------------------
 load_dotenv()
 from pathlib import Path
-REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", "app/reports")).resolve()   # e.g. /app/reports/<patient_id>/<file.pdf>
 SSRI_FILE = Path(os.getenv("SSRI_FILE", "app/medicationCategories/SSRI_list.txt")).resolve()
 MISC_FILE = Path(os.getenv("MISC_FILE", "app/medicationCategories/MISC_list.txt")).resolve()
 GROQ_API_KEY = os.getenv("GROQ_API_KEY", None)
@@ -59,64 +59,59 @@ Fix missing quotes, trailing commas, unescaped newlines, stray assistant labels,
 # -------------------- JSON extraction / sanitizer ---------------------------
 def extract_json_from_llm_response(raw_response: str) -> dict:
-    """
-    Try extracting a JSON object from raw LLM text. Performs common cleanups seen in LLM outputs.
-    Raises JSONDecodeError if parsing still fails.
-    """
-    # --- 1) Pull out the JSON code-block if present ---
-    md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
-    json_string = md.group(1).strip() if md else raw_response
-    # --- 2) Trim to the outermost { … } so we drop any prefix/suffix junk ---
-    first, last = json_string.find('{'), json_string.rfind('}')
-    if 0 <= first < last:
-        json_string = json_string[first:last+1]
-    # --- 3) PRE-CLEANUP: remove rogue assistant labels, fix boolean quotes ---
-    json_string = re.sub(r'\b\w+\s*{', '{', json_string)
-    json_string = re.sub(r'"assistant"\s*:', '', json_string)
-    json_string = re.sub(r'\b(false|true)"', r'\1', json_string)
-    # --- 4) Escape embedded quotes in long string fields (best-effort) ---
-    def _esc(m):
-        prefix, body = m.group(1), m.group(2)
-        return prefix + body.replace('"', r'\"')
-    json_string = re.sub(
-        r'("logic"\s*:\s*")([\s\S]+?)(?=",\s*"[A-Za-z_]\w*"\s*:\s*)',
-        _esc,
-        json_string
-    )
-    # --- 5) Remove trailing commas before } or ] ---
-    json_string = re.sub(r',\s*(?=[}\],])', '', json_string)
-    json_string = re.sub(r',\s*,', ',', json_string)
-    # --- 6) Balance braces if obvious excess ---
-    ob, cb = json_string.count('{'), json_string.count('}')
-    if cb > ob:
-        excess = cb - ob
-        json_string = json_string.rstrip()[:-excess]
-    # --- 7) Escape literal newlines inside strings so json.loads can parse ---
-    def _escape_newlines_in_strings(s: str) -> str:
-        return re.sub(
-            r'"((?:[^"\\]|\\.)*?)"',
-            lambda m: '"' + m.group(1).replace('\n', '\\n').replace('\r', '\\r') + '"',
-            s,
-            flags=re.DOTALL
         )
-    json_string = _escape_newlines_in_strings(json_string)
-    # Final parse
-    return json.loads(json_string)
 # -------------------- Utility: Bloatectomy wrapper ------------------------
 def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
-    """
-    Uses the bloatectomy class to remove duplicates.
-    style: 'highlight'|'bold'|'remov' ; we use 'remov' to delete duplicates.
-    Returns cleaned text (single string).
-    """
     try:
         b = bloatectomy(text, style=style, output="html")
         tokens = getattr(b, "tokens", None)
@@ -129,55 +124,67 @@ def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
 # --------------- Utility: medication extraction (adapted) -----------------
 def readDrugs_from_file(path: Path):
-    if not path.exists():
         return {}, []
-    txt = path.read_text(encoding="utf-8", errors="ignore")
-    generics = re.findall(r"^(.*?)\|", txt, re.MULTILINE)
-    generics = [g.lower() for g in generics if g]
-    lines = [ln.strip().lower() for ln in txt.splitlines() if ln.strip()]
-    return dict(zip(generics, lines)), generics
 def addToDrugs_line(line: str, drugs_flags: List[int], listing: Dict[str,str], genList: List[str]) -> List[int]:
-    gen_index = {g:i for i,g in enumerate(genList)}
-    for generic, pattern_line in listing.items():
-        try:
-            if re.search(pattern_line, line, re.I):
-                idx = gen_index.get(generic)
-                if idx is not None:
-                    drugs_flags[idx] = 1
-        except re.error:
-            continue
-    return drugs_flags
 def extract_medications_from_text(text: str) -> List[str]:
-    ssri_map, ssri_generics = readDrugs_from_file(SSRI_FILE)
-    misc_map, misc_generics = readDrugs_from_file(MISC_FILE)
-    combined_map = {**ssri_map, **misc_map}
-    combined_generics = []
-    if ssri_generics:
-        combined_generics.extend(ssri_generics)
-    if misc_generics:
-        combined_generics.extend(misc_generics)
-    flags = [0]* len(combined_generics)
-    meds_found = set()
-    for ln in text.splitlines():
-        ln = ln.strip()
-        if not ln:
-            continue
-        if combined_map:
-            flags = addToDrugs_line(ln, flags, combined_map, combined_generics)
-        m = re.search(r"\b(Rx|Drug|Medication|Prescribed|Tablet)\s*[:\-]?\s*([A-Za-z0-9\-\s/\.]+)", ln, re.I)
-        if m:
-            meds_found.add(m.group(2).strip())
-        m2 = re.findall(r"\b([A-Z][a-z0-9\-]{2,}\s*(?:[0-9]{1,4}\s*(?:mg|mcg|g|IU))?)", ln)
-        for s in m2:
-            if re.search(r"\b(mg|mcg|g|IU)\b", s, re.I):
-                meds_found.add(s.strip())
-    for i, f in enumerate(flags):
-        if f == 1:
-            meds_found.add(combined_generics[i])
-    return list(meds_found)
 # -------------------- Node prompts --------------------------
 PATIENT_NODE_PROMPT = """
@@ -393,6 +400,7 @@ graph_builder.add_edge("condition_loop", END)
 graph = graph_builder.compile()
 # -------------------- Flask app & endpoints -------------------------------
 BASE_DIR = Path(__file__).resolve().parent
 static_folder = BASE_DIR / "static"
@@ -404,12 +412,18 @@ CORS(app)  # dev convenience; lock down in production
 def serve_frontend():
     try:
         return app.send_static_file("frontend.html")
-    except Exception:
         return "<h3>frontend.html not found in static/ — drop your frontend.html there.</h3>", 404
 @app.route("/process_reports", methods=["POST"])
 def process_reports():
-    data = request.get_json(force=True)
     patient_id = data.get("patient_id")
     filenames = data.get("filenames", [])
     extra_patient_meta = data.get("patientDetails", {})
@@ -433,9 +447,13 @@ def process_reports():
             elements = partition_pdf(filename=str(file_path))
             page_text = "\n".join([el.text for el in elements if hasattr(el, "text") and el.text])
         except Exception:
-            logger.exception("Failed to parse PDF %s", file_path)
             page_text = ""
-        cleaned = clean_notes_with_bloatectomy(page_text, style="remov")
         documents.append({
             "filename": fname,
             "raw_text": page_text,
@@ -447,7 +465,11 @@ def process_reports():
         return jsonify({"error": "no valid documents found"}), 400
     combined_text = "\n\n".join(combined_text_parts)
-    meds = extract_medications_from_text(combined_text)
     initial_state = {
         "patient_meta": extra_patient_meta,
@@ -462,7 +484,7 @@ def process_reports():
         # Validate and fill placeholders if needed
         if not result_state.get("valid", True):
             missing = result_state.get("missing", [])
-            logger.info("Validation failed; missing keys: %s", missing)
             if "patientDetails" in missing:
                 result_state["patientDetails"] = extra_patient_meta or {"name": "", "age": "", "sex": "", "pid": patient_id}
             if "reports" in missing:
@@ -497,3 +519,4 @@ def ping():
 if __name__ == "__main__":
     port = int(os.getenv("PORT", 7860))
     app.run(host="0.0.0.0", port=port, debug=True)

 # --- Environment & config -------------------------------------------------
 load_dotenv()
 from pathlib import Path
+REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", "reports")).resolve()   # e.g. /app/reports/<patient_id>/<file.pdf>
 SSRI_FILE = Path(os.getenv("SSRI_FILE", "app/medicationCategories/SSRI_list.txt")).resolve()
 MISC_FILE = Path(os.getenv("MISC_FILE", "app/medicationCategories/MISC_list.txt")).resolve()
 GROQ_API_KEY = os.getenv("GROQ_API_KEY", None)
 # -------------------- JSON extraction / sanitizer ---------------------------
 def extract_json_from_llm_response(raw_response: str) -> dict:
+    try:
+        # --- 1) Pull out the JSON code-block if present ---
+        md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
+        json_string = md.group(1).strip() if md else raw_response
+        # --- 2) Trim to the outermost { … } so we drop any prefix/suffix junk ---
+        first, last = json_string.find('{'), json_string.rfind('}')
+        if 0 <= first < last:
+            json_string = json_string[first:last+1]
+        # --- 3) PRE-CLEANUP: remove rogue assistant labels, fix boolean quotes ---
+        json_string = re.sub(r'\b\w+\s*{', '{', json_string)
+        json_string = re.sub(r'"assistant"\s*:', '', json_string)
+        json_string = re.sub(r'\b(false|true)"', r'\1', json_string)
+        # --- 4) Escape embedded quotes in long string fields (best-effort) ---
+        def _esc(m):
+            prefix, body = m.group(1), m.group(2)
+            return prefix + body.replace('"', r'\"')
+        json_string = re.sub(
+            r'("logic"\s*:\s*")([\s\S]+?)(?=",\s*"[A-Za-z_]\w*"\s*:\s*)',
+            _esc,
+            json_string
         )
+        # --- 5) Remove trailing commas before } or ] ---
+        json_string = re.sub(r',\s*(?=[}\],])', '', json_string)
+        json_string = re.sub(r',\s*,', ',', json_string)
+        # --- 6) Balance braces if obvious excess ---
+        ob, cb = json_string.count('{'), json_string.count('}')
+        if cb > ob:
+            excess = cb - ob
+            json_string = json_string.rstrip()[:-excess]
+        # --- 7) Escape literal newlines inside strings so json.loads can parse ---
+        def _escape_newlines_in_strings(s: str) -> str:
+            return re.sub(
+                r'"((?:[^"\\]|\\.)*?)"',
+                lambda m: '"' + m.group(1).replace('\n', '\\n').replace('\r', '\\r') + '"',
+                s,
+                flags=re.DOTALL
+            )
+        json_string = _escape_newlines_in_strings(json_string)
+        # Final parse
+        return json.loads(json_string)
+    except Exception as e:
+        logger.error(f"Failed to extract JSON from LLM response: {e}")
+        raise
 # -------------------- Utility: Bloatectomy wrapper ------------------------
 def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
     try:
         b = bloatectomy(text, style=style, output="html")
         tokens = getattr(b, "tokens", None)
 # --------------- Utility: medication extraction (adapted) -----------------
 def readDrugs_from_file(path: Path):
+    try:
+        if not path.exists():
+            return {}, []
+        txt = path.read_text(encoding="utf-8", errors="ignore")
+        generics = re.findall(r"^(.*?)\|", txt, re.MULTILINE)
+        generics = [g.lower() for g in generics if g]
+        lines = [ln.strip().lower() for ln in txt.splitlines() if ln.strip()]
+        return dict(zip(generics, lines)), generics
+    except Exception:
+        logger.exception(f"Failed to read drugs from file: {path}")
         return {}, []
 def addToDrugs_line(line: str, drugs_flags: List[int], listing: Dict[str,str], genList: List[str]) -> List[int]:
+    try:
+        gen_index = {g:i for i,g in enumerate(genList)}
+        for generic, pattern_line in listing.items():
+            try:
+                if re.search(pattern_line, line, re.I):
+                    idx = gen_index.get(generic)
+                    if idx is not None:
+                        drugs_flags[idx] = 1
+            except re.error:
+                continue
+        return drugs_flags
+    except Exception:
+        logger.exception("Error in addToDrugs_line")
+        return drugs_flags
 def extract_medications_from_text(text: str) -> List[str]:
+    try:
+        ssri_map, ssri_generics = readDrugs_from_file(SSRI_FILE)
+        misc_map, misc_generics = readDrugs_from_file(MISC_FILE)
+        combined_map = {**ssri_map, **misc_map}
+        combined_generics = []
+        if ssri_generics:
+            combined_generics.extend(ssri_generics)
+        if misc_generics:
+            combined_generics.extend(misc_generics)
+        flags = [0]* len(combined_generics)
+        meds_found = set()
+        for ln in text.splitlines():
+            ln = ln.strip()
+            if not ln:
+                continue
+            if combined_map:
+                flags = addToDrugs_line(ln, flags, combined_map, combined_generics)
+            m = re.search(r"\b(Rx|Drug|Medication|Prescribed|Tablet)\s*[:\-]?\s*([A-Za-z0-9\-\s/\.]+)", ln, re.I)
+            if m:
+                meds_found.add(m.group(2).strip())
+            m2 = re.findall(r"\b([A-Z][a-z0-9\-]{2,}\s*(?:[0-9]{1,4}\s*(?:mg|mcg|g|IU))?)", ln)
+            for s in m2:
+                if re.search(r"\b(mg|mcg|g|IU)\b", s, re.I):
+                    meds_found.add(s.strip())
+        for i, f in enumerate(flags):
+            if f == 1:
+                meds_found.add(combined_generics[i])
+        return list(meds_found)
+    except Exception:
+        logger.exception("Failed to extract medications from text")
+        return []
 # -------------------- Node prompts --------------------------
 PATIENT_NODE_PROMPT = """
 graph = graph_builder.compile()
+# -------------------- Flask app & endpoints -------------------------------
 # -------------------- Flask app & endpoints -------------------------------
 BASE_DIR = Path(__file__).resolve().parent
 static_folder = BASE_DIR / "static"
 def serve_frontend():
     try:
         return app.send_static_file("frontend.html")
+    except Exception as e:
+        logger.error(f"Failed to serve frontend.html: {e}")
         return "<h3>frontend.html not found in static/ — drop your frontend.html there.</h3>", 404
 @app.route("/process_reports", methods=["POST"])
 def process_reports():
+    try:
+        data = request.get_json(force=True)
+    except Exception as e:
+        logger.error(f"Failed to parse JSON request: {e}")
+        return jsonify({"error": "Invalid JSON request"}), 400
     patient_id = data.get("patient_id")
     filenames = data.get("filenames", [])
     extra_patient_meta = data.get("patientDetails", {})
             elements = partition_pdf(filename=str(file_path))
             page_text = "\n".join([el.text for el in elements if hasattr(el, "text") and el.text])
         except Exception:
+            logger.exception(f"Failed to parse PDF {file_path}")
             page_text = ""
+        try:
+            cleaned = clean_notes_with_bloatectomy(page_text, style="remov")
+        except Exception:
+            logger.exception("Failed to clean notes with bloatectomy")
+            cleaned = page_text
         documents.append({
             "filename": fname,
             "raw_text": page_text,
         return jsonify({"error": "no valid documents found"}), 400
     combined_text = "\n\n".join(combined_text_parts)
+    try:
+        meds = extract_medications_from_text(combined_text)
+    except Exception:
+        logger.exception("Failed to extract medications")
+        meds = []
     initial_state = {
         "patient_meta": extra_patient_meta,
         # Validate and fill placeholders if needed
         if not result_state.get("valid", True):
             missing = result_state.get("missing", [])
+            logger.info(f"Validation failed; missing keys: {missing}")
             if "patientDetails" in missing:
                 result_state["patientDetails"] = extra_patient_meta or {"name": "", "age": "", "sex": "", "pid": patient_id}
             if "reports" in missing:
 if __name__ == "__main__":
     port = int(os.getenv("PORT", 7860))
     app.run(host="0.0.0.0", port=port, debug=True)