WebashalarForML commited on
Commit
854b6ec
·
verified ·
1 Parent(s): ee2fad9

Upload 27 files

Browse files
Files changed (27) hide show
  1. Diseases Identifiable from Lab Reports.docx +0 -0
  2. Diseases Identifiable from Lab Reports.pdf +0 -0
  3. app2.py +499 -0
  4. lab_reports/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  5. lab_reports/DENGUE-FEVER-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  6. lab_reports/Malaria-parasite-identification-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  7. lab_reports/Positive-Malaria-parasite-identification-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  8. lab_reports/THYROID-ANTIBODIES-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  9. lab_reports/THYROID-PROFILE-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  10. medicationCategories/LICENSE +674 -0
  11. medicationCategories/MISC_list.txt +0 -0
  12. medicationCategories/README.md +28 -0
  13. medicationCategories/SSRI_list.txt +6 -0
  14. medicationCategories/finddrugs.ipynb +302 -0
  15. medicationCategories/finddrugs.py +237 -0
  16. reports/p_1234/CBC.pdf +0 -0
  17. reports/p_1234/DENGUE.pdf +0 -0
  18. reports/p_1234/Malaria.pdf +0 -0
  19. reports/p_1234/THYROID.pdf +0 -0
  20. reports/p_4567/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  21. reports/p_4567/DENGUE-FEVER-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  22. reports/p_4567/Malaria-parasite-identification-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  23. reports/p_4567/Positive-Malaria-parasite-identification-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  24. reports/p_4567/THYROID-ANTIBODIES-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  25. reports/p_4567/THYROID-PROFILE-test-report-format-example-sample-template-Drlogy-lab-report.pdf +0 -0
  26. req2.txt +190 -0
  27. static/frontend.html +96 -0
Diseases Identifiable from Lab Reports.docx ADDED
Binary file (17.9 kB). View file
 
Diseases Identifiable from Lab Reports.pdf ADDED
Binary file (40.6 kB). View file
 
app2.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # app.py - Health Reports processing agent (PDF -> cleaned text -> structured JSON)
3
+ # Requires: bloatectomy, unstructured, langgraph, langchain_groq (ChatGroq), python-dotenv
4
+
5
+ import os
6
+ import json
7
+ import logging
8
+ import re
9
+ from pathlib import Path
10
+ from typing import List, Dict, Any
11
+
12
+ from flask import Flask, request, jsonify
13
+ from flask_cors import CORS
14
+ from dotenv import load_dotenv
15
+ from unstructured.partition.pdf import partition_pdf
16
+
17
+ # Bloatectomy class (as per the source you provided)
18
+ from bloatectomy import bloatectomy
19
+
20
+ # LLM / agent
21
+ from langchain_groq import ChatGroq
22
+ from langgraph.prebuilt import create_react_agent
23
+
24
+ # LangGraph imports
25
+ from langgraph.graph import StateGraph, START, END
26
+ from typing_extensions import TypedDict, NotRequired
27
+
28
+ # --- Logging ---------------------------------------------------------------
29
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
30
+ logger = logging.getLogger("health-agent")
31
+
32
+ # --- Environment & config -------------------------------------------------
33
+ load_dotenv()
34
+ REPORTS_ROOT = Path(os.getenv("REPORTS_ROOT", r"D:\DEV PATEL\2025\HealthCareAI\reports")) # e.g. /app/reports/<patient_id>/<file.pdf>
35
+ SSRI_FILE = Path(os.getenv("SSRI_FILE", r"D:\DEV PATEL\2025\HealthCareAI\medicationCategories\SSRI_list.txt"))
36
+ MISC_FILE = Path(os.getenv("MISC_FILE", r"D:\DEV PATEL\2025\HealthCareAI\medicationCategories\MISC_list.txt"))
37
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", None)
38
+
39
+ # --- LLM setup -------------------------------------------------------------
40
+ llm = ChatGroq(
41
+ model=os.getenv("LLM_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct"),
42
+ temperature=0.0,
43
+ max_tokens=None,
44
+ )
45
+
46
+ # Top-level strict system prompt for report JSON pieces (each node will use a more specific prompt)
47
+ NODE_BASE_INSTRUCTIONS = """
48
+ You are HealthAI — a clinical assistant producing JSON for downstream processing.
49
+ Produce only valid JSON (no extra text). Follow field types exactly. If missing data, return empty strings or empty arrays.
50
+ Be conservative: do not assert diagnoses; provide suggestions and ask physician confirmation where needed.
51
+ """
52
+
53
+ # Build a generic agent and a JSON resolver agent (to fix broken JSON from LLM)
54
+ agent = create_react_agent(model=llm, tools=[], prompt=NODE_BASE_INSTRUCTIONS)
55
+ agent_json_resolver = create_react_agent(model=llm, tools=[], prompt="""
56
+ You are a JSON fixer. Input: a possibly-malformed JSON-like text. Output: valid JSON only (enclosed in triple backticks).
57
+ Fix missing quotes, trailing commas, unescaped newlines, stray assistant labels, and ensure schema compliance.
58
+ """)
59
+
60
+ # -------------------- JSON extraction / sanitizer ---------------------------
61
+ def extract_json_from_llm_response(raw_response: str) -> dict:
62
+ """
63
+ Try extracting a JSON object from raw LLM text. Performs common cleanups seen in LLM outputs.
64
+ Raises JSONDecodeError if parsing still fails.
65
+ """
66
+ # --- 1) Pull out the JSON code-block if present ---
67
+ md = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", raw_response)
68
+ json_string = md.group(1).strip() if md else raw_response
69
+
70
+ # --- 2) Trim to the outermost { … } so we drop any prefix/suffix junk ---
71
+ first, last = json_string.find('{'), json_string.rfind('}')
72
+ if 0 <= first < last:
73
+ json_string = json_string[first:last+1]
74
+
75
+ # --- 3) PRE-CLEANUP: remove rogue assistant labels, fix boolean quotes ---
76
+ json_string = re.sub(r'\b\w+\s*{', '{', json_string)
77
+ json_string = re.sub(r'"assistant"\s*:', '', json_string)
78
+ json_string = re.sub(r'\b(false|true)"', r'\1', json_string)
79
+
80
+ # --- 4) Escape embedded quotes in long string fields (best-effort) ---
81
+ def _esc(m):
82
+ prefix, body = m.group(1), m.group(2)
83
+ return prefix + body.replace('"', r'\"')
84
+ json_string = re.sub(
85
+ r'("logic"\s*:\s*")([\s\S]+?)(?=",\s*"[A-Za-z_]\w*"\s*:\s*)',
86
+ _esc,
87
+ json_string
88
+ )
89
+
90
+ # --- 5) Remove trailing commas before } or ] ---
91
+ json_string = re.sub(r',\s*(?=[}\],])', '', json_string)
92
+ json_string = re.sub(r',\s*,', ',', json_string)
93
+
94
+ # --- 6) Balance braces if obvious excess ---
95
+ ob, cb = json_string.count('{'), json_string.count('}')
96
+ if cb > ob:
97
+ excess = cb - ob
98
+ json_string = json_string.rstrip()[:-excess]
99
+
100
+ # --- 7) Escape literal newlines inside strings so json.loads can parse ---
101
+ def _escape_newlines_in_strings(s: str) -> str:
102
+ return re.sub(
103
+ r'"((?:[^"\\]|\\.)*?)"',
104
+ lambda m: '"' + m.group(1).replace('\n', '\\n').replace('\r', '\\r') + '"',
105
+ s,
106
+ flags=re.DOTALL
107
+ )
108
+ json_string = _escape_newlines_in_strings(json_string)
109
+
110
+ # Final parse
111
+ return json.loads(json_string)
112
+
113
+ # -------------------- Utility: Bloatectomy wrapper ------------------------
114
+ def clean_notes_with_bloatectomy(text: str, style: str = "remov") -> str:
115
+ """
116
+ Uses the bloatectomy class to remove duplicates.
117
+ style: 'highlight'|'bold'|'remov' ; we use 'remov' to delete duplicates.
118
+ Returns cleaned text (single string).
119
+ """
120
+ try:
121
+ b = bloatectomy(text, style=style, output="html")
122
+ tokens = getattr(b, "tokens", None)
123
+ if not tokens:
124
+ return text
125
+ return "\n".join(tokens)
126
+ except Exception:
127
+ logger.exception("Bloatectomy cleaning failed; returning original text")
128
+ return text
129
+
130
+ # --------------- Utility: medication extraction (adapted) -----------------
131
+ def readDrugs_from_file(path: Path):
132
+ if not path.exists():
133
+ return {}, []
134
+ txt = path.read_text(encoding="utf-8", errors="ignore")
135
+ generics = re.findall(r"^(.*?)\|", txt, re.MULTILINE)
136
+ generics = [g.lower() for g in generics if g]
137
+ lines = [ln.strip().lower() for ln in txt.splitlines() if ln.strip()]
138
+ return dict(zip(generics, lines)), generics
139
+
140
+ def addToDrugs_line(line: str, drugs_flags: List[int], listing: Dict[str,str], genList: List[str]) -> List[int]:
141
+ gen_index = {g:i for i,g in enumerate(genList)}
142
+ for generic, pattern_line in listing.items():
143
+ try:
144
+ if re.search(pattern_line, line, re.I):
145
+ idx = gen_index.get(generic)
146
+ if idx is not None:
147
+ drugs_flags[idx] = 1
148
+ except re.error:
149
+ continue
150
+ return drugs_flags
151
+
152
+ def extract_medications_from_text(text: str) -> List[str]:
153
+ ssri_map, ssri_generics = readDrugs_from_file(SSRI_FILE)
154
+ misc_map, misc_generics = readDrugs_from_file(MISC_FILE)
155
+ combined_map = {**ssri_map, **misc_map}
156
+ combined_generics = []
157
+ if ssri_generics:
158
+ combined_generics.extend(ssri_generics)
159
+ if misc_generics:
160
+ combined_generics.extend(misc_generics)
161
+
162
+ flags = [0]* len(combined_generics)
163
+ meds_found = set()
164
+ for ln in text.splitlines():
165
+ ln = ln.strip()
166
+ if not ln:
167
+ continue
168
+ if combined_map:
169
+ flags = addToDrugs_line(ln, flags, combined_map, combined_generics)
170
+ m = re.search(r"\b(Rx|Drug|Medication|Prescribed|Tablet)\s*[:\-]?\s*([A-Za-z0-9\-\s/\.]+)", ln, re.I)
171
+ if m:
172
+ meds_found.add(m.group(2).strip())
173
+ m2 = re.findall(r"\b([A-Z][a-z0-9\-]{2,}\s*(?:[0-9]{1,4}\s*(?:mg|mcg|g|IU))?)", ln)
174
+ for s in m2:
175
+ if re.search(r"\b(mg|mcg|g|IU)\b", s, re.I):
176
+ meds_found.add(s.strip())
177
+ for i, f in enumerate(flags):
178
+ if f == 1:
179
+ meds_found.add(combined_generics[i])
180
+ return list(meds_found)
181
+
182
+ # -------------------- Node prompts --------------------------
183
+ PATIENT_NODE_PROMPT = """
184
+ You will extract patientDetails from the provided document texts.
185
+ Return ONLY JSON with this exact shape:
186
+ { "patientDetails": {"name": "", "age": "", "sex": "", "pid": ""} }
187
+ Fill fields using text evidence or leave empty strings.
188
+ """
189
+
190
+ DOCTOR_NODE_PROMPT = """
191
+ You will extract doctorDetails found in the documents.
192
+ Return ONLY JSON with this exact shape:
193
+ { "doctorDetails": {"referredBy": ""} }
194
+ """
195
+
196
+ TEST_REPORT_NODE_PROMPT = """
197
+ You will extract per-test structured results from the documents.
198
+ Return ONLY JSON with this exact shape:
199
+ {
200
+ "reports": [
201
+ {
202
+ "testName": "",
203
+ "dateReported": "",
204
+ "timeReported": "",
205
+ "abnormalFindings": [
206
+ {"investigation": "", "result": 0, "unit": "", "status": "", "referenceValue": ""}
207
+ ],
208
+ "interpretation": "",
209
+ "trends": []
210
+ }
211
+ ]
212
+ }
213
+ - Include only findings that are outside reference ranges OR explicitly called 'abnormal' in the report.
214
+ - For result numeric parsing, prefer numeric values; if not numeric, keep original string.
215
+ - Use statuses: Low, High, Borderline, Positive, Negative, Normal.
216
+ """
217
+
218
+ ANALYSIS_NODE_PROMPT = """
219
+ You will create an overallAnalysis based on the extracted reports (the agent will give you the 'reports' JSON).
220
+ Return ONLY JSON:
221
+ { "overallAnalysis": { "summary": "", "recommendations": "", "longTermTrends": "",""risk_prediction": "","drug_interaction": "" } }
222
+ Be conservative, evidence-based, and suggest follow-up steps for physicians.
223
+ """
224
+
225
+ CONDITION_LOOP_NODE_PROMPT = """
226
+ Validation and condition node:
227
+ Input: partial JSON (patientDetails, doctorDetails, reports, overallAnalysis).
228
+ Task: Check required keys exist and that each report has at least testName and abnormalFindings list.
229
+ Return ONLY JSON:
230
+ { "valid": true, "missing": [] }
231
+ If missing fields, list keys in 'missing'. Do NOT modify content.
232
+ """
233
+
234
+ # -------------------- Node helpers -------------------------
235
+ def call_node_agent(node_prompt: str, payload: dict) -> dict:
236
+ """
237
+ Call the generic agent with a targeted node prompt and the payload.
238
+ Tries to parse JSON. If parsing fails, uses the JSON resolver agent once.
239
+ """
240
+ try:
241
+ content = {
242
+ "prompt": node_prompt,
243
+ "payload": payload
244
+ }
245
+ resp = agent.invoke({"messages": [{"role": "user", "content": json.dumps(content)}]})
246
+
247
+ # Extract raw text from AIMessage or other response types
248
+ raw = None
249
+ if isinstance(resp, str):
250
+ raw = resp
251
+ elif hasattr(resp, "content"): # AIMessage or similar
252
+ raw = resp.content
253
+ elif isinstance(resp, dict):
254
+ msgs = resp.get("messages")
255
+ if msgs:
256
+ last_msg = msgs[-1]
257
+ if isinstance(last_msg, str):
258
+ raw = last_msg
259
+ elif hasattr(last_msg, "content"):
260
+ raw = last_msg.content
261
+ elif isinstance(last_msg, dict):
262
+ raw = last_msg.get("content", "")
263
+ else:
264
+ raw = str(last_msg)
265
+ else:
266
+ raw = json.dumps(resp)
267
+ else:
268
+ raw = str(resp)
269
+
270
+ parsed = extract_json_from_llm_response(raw)
271
+ return parsed
272
+
273
+ except Exception as e:
274
+ logger.warning("Node agent JSON parse failed: %s. Attempting JSON resolver.", e)
275
+ try:
276
+ resolver_prompt = f"Fix this JSON. Input:\n```json\n{raw}\n```\nReturn valid JSON only."
277
+ r = agent_json_resolver.invoke({"messages": [{"role": "user", "content": resolver_prompt}]})
278
+
279
+ rtxt = None
280
+ if isinstance(r, str):
281
+ rtxt = r
282
+ elif hasattr(r, "content"):
283
+ rtxt = r.content
284
+ elif isinstance(r, dict):
285
+ msgs = r.get("messages")
286
+ if msgs:
287
+ last_msg = msgs[-1]
288
+ if isinstance(last_msg, str):
289
+ rtxt = last_msg
290
+ elif hasattr(last_msg, "content"):
291
+ rtxt = last_msg.content
292
+ elif isinstance(last_msg, dict):
293
+ rtxt = last_msg.get("content", "")
294
+ else:
295
+ rtxt = str(last_msg)
296
+ else:
297
+ rtxt = json.dumps(r)
298
+ else:
299
+ rtxt = str(r)
300
+
301
+ corrected = extract_json_from_llm_response(rtxt)
302
+ return corrected
303
+ except Exception as e2:
304
+ logger.exception("JSON resolver also failed: %s", e2)
305
+ return {}
306
+
307
+ # -------------------- Define LangGraph State schema -------------------------
308
+ class State(TypedDict):
309
+ patient_meta: NotRequired[Dict[str, Any]]
310
+ patient_id: str
311
+ documents: List[Dict[str, Any]]
312
+ medications: List[str]
313
+ patientDetails: NotRequired[Dict[str, Any]]
314
+ doctorDetails: NotRequired[Dict[str, Any]]
315
+ reports: NotRequired[List[Dict[str, Any]]]
316
+ overallAnalysis: NotRequired[Dict[str, Any]]
317
+ valid: NotRequired[bool]
318
+ missing: NotRequired[List[str]]
319
+
320
+ # -------------------- Node implementations as LangGraph nodes -------------------------
321
+ def patient_details_node(state: State) -> dict:
322
+ payload = {
323
+ "patient_meta": state.get("patient_meta", {}),
324
+ "documents": state.get("documents", []),
325
+ "medications": state.get("medications", [])
326
+ }
327
+ logger.info("Running patient_details_node")
328
+ out = call_node_agent(PATIENT_NODE_PROMPT, payload)
329
+ return {"patientDetails": out.get("patientDetails", {}) if isinstance(out, dict) else {}}
330
+
331
+ def doctor_details_node(state: State) -> dict:
332
+ payload = {
333
+ "documents": state.get("documents", []),
334
+ "medications": state.get("medications", [])
335
+ }
336
+ logger.info("Running doctor_details_node")
337
+ out = call_node_agent(DOCTOR_NODE_PROMPT, payload)
338
+ return {"doctorDetails": out.get("doctorDetails", {}) if isinstance(out, dict) else {}}
339
+
340
+ def test_report_node(state: State) -> dict:
341
+ payload = {
342
+ "documents": state.get("documents", []),
343
+ "medications": state.get("medications", [])
344
+ }
345
+ logger.info("Running test_report_node")
346
+ out = call_node_agent(TEST_REPORT_NODE_PROMPT, payload)
347
+ return {"reports": out.get("reports", []) if isinstance(out, dict) else []}
348
+
349
+ def analysis_node(state: State) -> dict:
350
+ payload = {
351
+ "patientDetails": state.get("patientDetails", {}),
352
+ "doctorDetails": state.get("doctorDetails", {}),
353
+ "reports": state.get("reports", []),
354
+ "medications": state.get("medications", [])
355
+ }
356
+ logger.info("Running analysis_node")
357
+ out = call_node_agent(ANALYSIS_NODE_PROMPT, payload)
358
+ return {"overallAnalysis": out.get("overallAnalysis", {}) if isinstance(out, dict) else {}}
359
+
360
+ def condition_loop_node(state: State) -> dict:
361
+ payload = {
362
+ "patientDetails": state.get("patientDetails", {}),
363
+ "doctorDetails": state.get("doctorDetails", {}),
364
+ "reports": state.get("reports", []),
365
+ "overallAnalysis": state.get("overallAnalysis", {})
366
+ }
367
+ logger.info("Running condition_loop_node (validation)")
368
+ out = call_node_agent(CONDITION_LOOP_NODE_PROMPT, payload)
369
+ if isinstance(out, dict) and "valid" in out:
370
+ return {"valid": bool(out.get("valid")), "missing": out.get("missing", [])}
371
+ missing = []
372
+ if not state.get("patientDetails"):
373
+ missing.append("patientDetails")
374
+ if not state.get("reports"):
375
+ missing.append("reports")
376
+ return {"valid": len(missing) == 0, "missing": missing}
377
+
378
+ # -------------------- Build LangGraph StateGraph -------------------------
379
+ graph_builder = StateGraph(State)
380
+
381
+ graph_builder.add_node("patient_details", patient_details_node)
382
+ graph_builder.add_node("doctor_details", doctor_details_node)
383
+ graph_builder.add_node("test_report", test_report_node)
384
+ graph_builder.add_node("analysis", analysis_node)
385
+ graph_builder.add_node("condition_loop", condition_loop_node)
386
+
387
+ graph_builder.add_edge(START, "patient_details")
388
+ graph_builder.add_edge("patient_details", "doctor_details")
389
+ graph_builder.add_edge("doctor_details", "test_report")
390
+ graph_builder.add_edge("test_report", "analysis")
391
+ graph_builder.add_edge("analysis", "condition_loop")
392
+ graph_builder.add_edge("condition_loop", END)
393
+
394
+ graph = graph_builder.compile()
395
+
396
+ # -------------------- Flask app & endpoints -------------------------------
397
+ BASE_DIR = Path(__file__).resolve().parent
398
+ static_folder = BASE_DIR / "static"
399
+ app = Flask(__name__, static_folder=str(static_folder), static_url_path="/static")
400
+ CORS(app) # dev convenience; lock down in production
401
+
402
+ # serve frontend root
403
+ @app.route("/", methods=["GET"])
404
+ def serve_frontend():
405
+ try:
406
+ return app.send_static_file("frontend.html")
407
+ except Exception:
408
+ return "<h3>frontend.html not found in static/ — drop your frontend.html there.</h3>", 404
409
+
410
+ @app.route("/process_reports", methods=["POST"])
411
+ def process_reports():
412
+ data = request.get_json(force=True)
413
+ patient_id = data.get("patient_id")
414
+ filenames = data.get("filenames", [])
415
+ extra_patient_meta = data.get("patientDetails", {})
416
+
417
+ if not patient_id or not filenames:
418
+ return jsonify({"error": "missing patient_id or filenames"}), 400
419
+
420
+ patient_folder = REPORTS_ROOT / str(patient_id)
421
+ if not patient_folder.exists() or not patient_folder.is_dir():
422
+ return jsonify({"error": f"patient folder not found: {patient_folder}"}), 404
423
+
424
+ documents = []
425
+ combined_text_parts = []
426
+
427
+ for fname in filenames:
428
+ file_path = patient_folder / fname
429
+ if not file_path.exists():
430
+ logger.warning("file not found: %s", file_path)
431
+ continue
432
+ try:
433
+ elements = partition_pdf(filename=str(file_path))
434
+ page_text = "\n".join([el.text for el in elements if hasattr(el, "text") and el.text])
435
+ except Exception:
436
+ logger.exception("Failed to parse PDF %s", file_path)
437
+ page_text = ""
438
+ cleaned = clean_notes_with_bloatectomy(page_text, style="remov")
439
+ documents.append({
440
+ "filename": fname,
441
+ "raw_text": page_text,
442
+ "cleaned_text": cleaned
443
+ })
444
+ combined_text_parts.append(cleaned)
445
+
446
+ if not documents:
447
+ return jsonify({"error": "no valid documents found"}), 400
448
+
449
+ combined_text = "\n\n".join(combined_text_parts)
450
+ meds = extract_medications_from_text(combined_text)
451
+
452
+ initial_state = {
453
+ "patient_meta": extra_patient_meta,
454
+ "patient_id": patient_id,
455
+ "documents": documents,
456
+ "medications": meds
457
+ }
458
+
459
+ try:
460
+ result_state = graph.invoke(initial_state)
461
+
462
+ # Validate and fill placeholders if needed
463
+ if not result_state.get("valid", True):
464
+ missing = result_state.get("missing", [])
465
+ logger.info("Validation failed; missing keys: %s", missing)
466
+ if "patientDetails" in missing:
467
+ result_state["patientDetails"] = extra_patient_meta or {"name": "", "age": "", "sex": "", "pid": patient_id}
468
+ if "reports" in missing:
469
+ result_state["reports"] = []
470
+ # Re-run analysis node to keep overallAnalysis consistent
471
+ result_state.update(analysis_node(result_state))
472
+ # Re-validate
473
+ cond = condition_loop_node(result_state)
474
+ result_state.update(cond)
475
+
476
+ safe_response = {
477
+ "patientDetails": result_state.get("patientDetails", {"name": "", "age": "", "sex": "", "pid": patient_id}),
478
+ "doctorDetails": result_state.get("doctorDetails", {"referredBy": ""}),
479
+ "reports": result_state.get("reports", []),
480
+ "overallAnalysis": result_state.get("overallAnalysis", {"summary": "", "recommendations": "", "longTermTrends": ""}),
481
+ "_pre_extracted_medications": result_state.get("medications", []),
482
+ "_validation": {
483
+ "valid": result_state.get("valid", True),
484
+ "missing": result_state.get("missing", [])
485
+ }
486
+ }
487
+ return jsonify(safe_response), 200
488
+
489
+ except Exception as e:
490
+ logger.exception("Node pipeline failed")
491
+ return jsonify({"error": "Node pipeline failed", "detail": str(e)}), 500
492
+
493
+ @app.route("/ping", methods=["GET"])
494
+ def ping():
495
+ return jsonify({"status": "ok"})
496
+
497
+ if __name__ == "__main__":
498
+ port = int(os.getenv("PORT", 5000))
499
+ app.run(host="0.0.0.0", port=port, debug=True)
lab_reports/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (70.8 kB). View file
 
lab_reports/DENGUE-FEVER-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (71.6 kB). View file
 
lab_reports/Malaria-parasite-identification-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (86.4 kB). View file
 
lab_reports/Positive-Malaria-parasite-identification-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (85.8 kB). View file
 
lab_reports/THYROID-ANTIBODIES-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (71.2 kB). View file
 
lab_reports/THYROID-PROFILE-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (69.7 kB). View file
 
medicationCategories/LICENSE ADDED
@@ -0,0 +1,674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU General Public License is a free, copyleft license for
11
+ software and other kinds of works.
12
+
13
+ The licenses for most software and other practical works are designed
14
+ to take away your freedom to share and change the works. By contrast,
15
+ the GNU General Public License is intended to guarantee your freedom to
16
+ share and change all versions of a program--to make sure it remains free
17
+ software for all its users. We, the Free Software Foundation, use the
18
+ GNU General Public License for most of our software; it applies also to
19
+ any other work released this way by its authors. You can apply it to
20
+ your programs, too.
21
+
22
+ When we speak of free software, we are referring to freedom, not
23
+ price. Our General Public Licenses are designed to make sure that you
24
+ have the freedom to distribute copies of free software (and charge for
25
+ them if you wish), that you receive source code or can get it if you
26
+ want it, that you can change the software or use pieces of it in new
27
+ free programs, and that you know you can do these things.
28
+
29
+ To protect your rights, we need to prevent others from denying you
30
+ these rights or asking you to surrender the rights. Therefore, you have
31
+ certain responsibilities if you distribute copies of the software, or if
32
+ you modify it: responsibilities to respect the freedom of others.
33
+
34
+ For example, if you distribute copies of such a program, whether
35
+ gratis or for a fee, you must pass on to the recipients the same
36
+ freedoms that you received. You must make sure that they, too, receive
37
+ or can get the source code. And you must show them these terms so they
38
+ know their rights.
39
+
40
+ Developers that use the GNU GPL protect your rights with two steps:
41
+ (1) assert copyright on the software, and (2) offer you this License
42
+ giving you legal permission to copy, distribute and/or modify it.
43
+
44
+ For the developers' and authors' protection, the GPL clearly explains
45
+ that there is no warranty for this free software. For both users' and
46
+ authors' sake, the GPL requires that modified versions be marked as
47
+ changed, so that their problems will not be attributed erroneously to
48
+ authors of previous versions.
49
+
50
+ Some devices are designed to deny users access to install or run
51
+ modified versions of the software inside them, although the manufacturer
52
+ can do so. This is fundamentally incompatible with the aim of
53
+ protecting users' freedom to change the software. The systematic
54
+ pattern of such abuse occurs in the area of products for individuals to
55
+ use, which is precisely where it is most unacceptable. Therefore, we
56
+ have designed this version of the GPL to prohibit the practice for those
57
+ products. If such problems arise substantially in other domains, we
58
+ stand ready to extend this provision to those domains in future versions
59
+ of the GPL, as needed to protect the freedom of users.
60
+
61
+ Finally, every program is threatened constantly by software patents.
62
+ States should not allow patents to restrict development and use of
63
+ software on general-purpose computers, but in those that do, we wish to
64
+ avoid the special danger that patents applied to a free program could
65
+ make it effectively proprietary. To prevent this, the GPL assures that
66
+ patents cannot be used to render the program non-free.
67
+
68
+ The precise terms and conditions for copying, distribution and
69
+ modification follow.
70
+
71
+ TERMS AND CONDITIONS
72
+
73
+ 0. Definitions.
74
+
75
+ "This License" refers to version 3 of the GNU General Public License.
76
+
77
+ "Copyright" also means copyright-like laws that apply to other kinds of
78
+ works, such as semiconductor masks.
79
+
80
+ "The Program" refers to any copyrightable work licensed under this
81
+ License. Each licensee is addressed as "you". "Licensees" and
82
+ "recipients" may be individuals or organizations.
83
+
84
+ To "modify" a work means to copy from or adapt all or part of the work
85
+ in a fashion requiring copyright permission, other than the making of an
86
+ exact copy. The resulting work is called a "modified version" of the
87
+ earlier work or a work "based on" the earlier work.
88
+
89
+ A "covered work" means either the unmodified Program or a work based
90
+ on the Program.
91
+
92
+ To "propagate" a work means to do anything with it that, without
93
+ permission, would make you directly or secondarily liable for
94
+ infringement under applicable copyright law, except executing it on a
95
+ computer or modifying a private copy. Propagation includes copying,
96
+ distribution (with or without modification), making available to the
97
+ public, and in some countries other activities as well.
98
+
99
+ To "convey" a work means any kind of propagation that enables other
100
+ parties to make or receive copies. Mere interaction with a user through
101
+ a computer network, with no transfer of a copy, is not conveying.
102
+
103
+ An interactive user interface displays "Appropriate Legal Notices"
104
+ to the extent that it includes a convenient and prominently visible
105
+ feature that (1) displays an appropriate copyright notice, and (2)
106
+ tells the user that there is no warranty for the work (except to the
107
+ extent that warranties are provided), that licensees may convey the
108
+ work under this License, and how to view a copy of this License. If
109
+ the interface presents a list of user commands or options, such as a
110
+ menu, a prominent item in the list meets this criterion.
111
+
112
+ 1. Source Code.
113
+
114
+ The "source code" for a work means the preferred form of the work
115
+ for making modifications to it. "Object code" means any non-source
116
+ form of a work.
117
+
118
+ A "Standard Interface" means an interface that either is an official
119
+ standard defined by a recognized standards body, or, in the case of
120
+ interfaces specified for a particular programming language, one that
121
+ is widely used among developers working in that language.
122
+
123
+ The "System Libraries" of an executable work include anything, other
124
+ than the work as a whole, that (a) is included in the normal form of
125
+ packaging a Major Component, but which is not part of that Major
126
+ Component, and (b) serves only to enable use of the work with that
127
+ Major Component, or to implement a Standard Interface for which an
128
+ implementation is available to the public in source code form. A
129
+ "Major Component", in this context, means a major essential component
130
+ (kernel, window system, and so on) of the specific operating system
131
+ (if any) on which the executable work runs, or a compiler used to
132
+ produce the work, or an object code interpreter used to run it.
133
+
134
+ The "Corresponding Source" for a work in object code form means all
135
+ the source code needed to generate, install, and (for an executable
136
+ work) run the object code and to modify the work, including scripts to
137
+ control those activities. However, it does not include the work's
138
+ System Libraries, or general-purpose tools or generally available free
139
+ programs which are used unmodified in performing those activities but
140
+ which are not part of the work. For example, Corresponding Source
141
+ includes interface definition files associated with source files for
142
+ the work, and the source code for shared libraries and dynamically
143
+ linked subprograms that the work is specifically designed to require,
144
+ such as by intimate data communication or control flow between those
145
+ subprograms and other parts of the work.
146
+
147
+ The Corresponding Source need not include anything that users
148
+ can regenerate automatically from other parts of the Corresponding
149
+ Source.
150
+
151
+ The Corresponding Source for a work in source code form is that
152
+ same work.
153
+
154
+ 2. Basic Permissions.
155
+
156
+ All rights granted under this License are granted for the term of
157
+ copyright on the Program, and are irrevocable provided the stated
158
+ conditions are met. This License explicitly affirms your unlimited
159
+ permission to run the unmodified Program. The output from running a
160
+ covered work is covered by this License only if the output, given its
161
+ content, constitutes a covered work. This License acknowledges your
162
+ rights of fair use or other equivalent, as provided by copyright law.
163
+
164
+ You may make, run and propagate covered works that you do not
165
+ convey, without conditions so long as your license otherwise remains
166
+ in force. You may convey covered works to others for the sole purpose
167
+ of having them make modifications exclusively for you, or provide you
168
+ with facilities for running those works, provided that you comply with
169
+ the terms of this License in conveying all material for which you do
170
+ not control copyright. Those thus making or running the covered works
171
+ for you must do so exclusively on your behalf, under your direction
172
+ and control, on terms that prohibit them from making any copies of
173
+ your copyrighted material outside their relationship with you.
174
+
175
+ Conveying under any other circumstances is permitted solely under
176
+ the conditions stated below. Sublicensing is not allowed; section 10
177
+ makes it unnecessary.
178
+
179
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180
+
181
+ No covered work shall be deemed part of an effective technological
182
+ measure under any applicable law fulfilling obligations under article
183
+ 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184
+ similar laws prohibiting or restricting circumvention of such
185
+ measures.
186
+
187
+ When you convey a covered work, you waive any legal power to forbid
188
+ circumvention of technological measures to the extent such circumvention
189
+ is effected by exercising rights under this License with respect to
190
+ the covered work, and you disclaim any intention to limit operation or
191
+ modification of the work as a means of enforcing, against the work's
192
+ users, your or third parties' legal rights to forbid circumvention of
193
+ technological measures.
194
+
195
+ 4. Conveying Verbatim Copies.
196
+
197
+ You may convey verbatim copies of the Program's source code as you
198
+ receive it, in any medium, provided that you conspicuously and
199
+ appropriately publish on each copy an appropriate copyright notice;
200
+ keep intact all notices stating that this License and any
201
+ non-permissive terms added in accord with section 7 apply to the code;
202
+ keep intact all notices of the absence of any warranty; and give all
203
+ recipients a copy of this License along with the Program.
204
+
205
+ You may charge any price or no price for each copy that you convey,
206
+ and you may offer support or warranty protection for a fee.
207
+
208
+ 5. Conveying Modified Source Versions.
209
+
210
+ You may convey a work based on the Program, or the modifications to
211
+ produce it from the Program, in the form of source code under the
212
+ terms of section 4, provided that you also meet all of these conditions:
213
+
214
+ a) The work must carry prominent notices stating that you modified
215
+ it, and giving a relevant date.
216
+
217
+ b) The work must carry prominent notices stating that it is
218
+ released under this License and any conditions added under section
219
+ 7. This requirement modifies the requirement in section 4 to
220
+ "keep intact all notices".
221
+
222
+ c) You must license the entire work, as a whole, under this
223
+ License to anyone who comes into possession of a copy. This
224
+ License will therefore apply, along with any applicable section 7
225
+ additional terms, to the whole of the work, and all its parts,
226
+ regardless of how they are packaged. This License gives no
227
+ permission to license the work in any other way, but it does not
228
+ invalidate such permission if you have separately received it.
229
+
230
+ d) If the work has interactive user interfaces, each must display
231
+ Appropriate Legal Notices; however, if the Program has interactive
232
+ interfaces that do not display Appropriate Legal Notices, your
233
+ work need not make them do so.
234
+
235
+ A compilation of a covered work with other separate and independent
236
+ works, which are not by their nature extensions of the covered work,
237
+ and which are not combined with it such as to form a larger program,
238
+ in or on a volume of a storage or distribution medium, is called an
239
+ "aggregate" if the compilation and its resulting copyright are not
240
+ used to limit the access or legal rights of the compilation's users
241
+ beyond what the individual works permit. Inclusion of a covered work
242
+ in an aggregate does not cause this License to apply to the other
243
+ parts of the aggregate.
244
+
245
+ 6. Conveying Non-Source Forms.
246
+
247
+ You may convey a covered work in object code form under the terms
248
+ of sections 4 and 5, provided that you also convey the
249
+ machine-readable Corresponding Source under the terms of this License,
250
+ in one of these ways:
251
+
252
+ a) Convey the object code in, or embodied in, a physical product
253
+ (including a physical distribution medium), accompanied by the
254
+ Corresponding Source fixed on a durable physical medium
255
+ customarily used for software interchange.
256
+
257
+ b) Convey the object code in, or embodied in, a physical product
258
+ (including a physical distribution medium), accompanied by a
259
+ written offer, valid for at least three years and valid for as
260
+ long as you offer spare parts or customer support for that product
261
+ model, to give anyone who possesses the object code either (1) a
262
+ copy of the Corresponding Source for all the software in the
263
+ product that is covered by this License, on a durable physical
264
+ medium customarily used for software interchange, for a price no
265
+ more than your reasonable cost of physically performing this
266
+ conveying of source, or (2) access to copy the
267
+ Corresponding Source from a network server at no charge.
268
+
269
+ c) Convey individual copies of the object code with a copy of the
270
+ written offer to provide the Corresponding Source. This
271
+ alternative is allowed only occasionally and noncommercially, and
272
+ only if you received the object code with such an offer, in accord
273
+ with subsection 6b.
274
+
275
+ d) Convey the object code by offering access from a designated
276
+ place (gratis or for a charge), and offer equivalent access to the
277
+ Corresponding Source in the same way through the same place at no
278
+ further charge. You need not require recipients to copy the
279
+ Corresponding Source along with the object code. If the place to
280
+ copy the object code is a network server, the Corresponding Source
281
+ may be on a different server (operated by you or a third party)
282
+ that supports equivalent copying facilities, provided you maintain
283
+ clear directions next to the object code saying where to find the
284
+ Corresponding Source. Regardless of what server hosts the
285
+ Corresponding Source, you remain obligated to ensure that it is
286
+ available for as long as needed to satisfy these requirements.
287
+
288
+ e) Convey the object code using peer-to-peer transmission, provided
289
+ you inform other peers where the object code and Corresponding
290
+ Source of the work are being offered to the general public at no
291
+ charge under subsection 6d.
292
+
293
+ A separable portion of the object code, whose source code is excluded
294
+ from the Corresponding Source as a System Library, need not be
295
+ included in conveying the object code work.
296
+
297
+ A "User Product" is either (1) a "consumer product", which means any
298
+ tangible personal property which is normally used for personal, family,
299
+ or household purposes, or (2) anything designed or sold for incorporation
300
+ into a dwelling. In determining whether a product is a consumer product,
301
+ doubtful cases shall be resolved in favor of coverage. For a particular
302
+ product received by a particular user, "normally used" refers to a
303
+ typical or common use of that class of product, regardless of the status
304
+ of the particular user or of the way in which the particular user
305
+ actually uses, or expects or is expected to use, the product. A product
306
+ is a consumer product regardless of whether the product has substantial
307
+ commercial, industrial or non-consumer uses, unless such uses represent
308
+ the only significant mode of use of the product.
309
+
310
+ "Installation Information" for a User Product means any methods,
311
+ procedures, authorization keys, or other information required to install
312
+ and execute modified versions of a covered work in that User Product from
313
+ a modified version of its Corresponding Source. The information must
314
+ suffice to ensure that the continued functioning of the modified object
315
+ code is in no case prevented or interfered with solely because
316
+ modification has been made.
317
+
318
+ If you convey an object code work under this section in, or with, or
319
+ specifically for use in, a User Product, and the conveying occurs as
320
+ part of a transaction in which the right of possession and use of the
321
+ User Product is transferred to the recipient in perpetuity or for a
322
+ fixed term (regardless of how the transaction is characterized), the
323
+ Corresponding Source conveyed under this section must be accompanied
324
+ by the Installation Information. But this requirement does not apply
325
+ if neither you nor any third party retains the ability to install
326
+ modified object code on the User Product (for example, the work has
327
+ been installed in ROM).
328
+
329
+ The requirement to provide Installation Information does not include a
330
+ requirement to continue to provide support service, warranty, or updates
331
+ for a work that has been modified or installed by the recipient, or for
332
+ the User Product in which it has been modified or installed. Access to a
333
+ network may be denied when the modification itself materially and
334
+ adversely affects the operation of the network or violates the rules and
335
+ protocols for communication across the network.
336
+
337
+ Corresponding Source conveyed, and Installation Information provided,
338
+ in accord with this section must be in a format that is publicly
339
+ documented (and with an implementation available to the public in
340
+ source code form), and must require no special password or key for
341
+ unpacking, reading or copying.
342
+
343
+ 7. Additional Terms.
344
+
345
+ "Additional permissions" are terms that supplement the terms of this
346
+ License by making exceptions from one or more of its conditions.
347
+ Additional permissions that are applicable to the entire Program shall
348
+ be treated as though they were included in this License, to the extent
349
+ that they are valid under applicable law. If additional permissions
350
+ apply only to part of the Program, that part may be used separately
351
+ under those permissions, but the entire Program remains governed by
352
+ this License without regard to the additional permissions.
353
+
354
+ When you convey a copy of a covered work, you may at your option
355
+ remove any additional permissions from that copy, or from any part of
356
+ it. (Additional permissions may be written to require their own
357
+ removal in certain cases when you modify the work.) You may place
358
+ additional permissions on material, added by you to a covered work,
359
+ for which you have or can give appropriate copyright permission.
360
+
361
+ Notwithstanding any other provision of this License, for material you
362
+ add to a covered work, you may (if authorized by the copyright holders of
363
+ that material) supplement the terms of this License with terms:
364
+
365
+ a) Disclaiming warranty or limiting liability differently from the
366
+ terms of sections 15 and 16 of this License; or
367
+
368
+ b) Requiring preservation of specified reasonable legal notices or
369
+ author attributions in that material or in the Appropriate Legal
370
+ Notices displayed by works containing it; or
371
+
372
+ c) Prohibiting misrepresentation of the origin of that material, or
373
+ requiring that modified versions of such material be marked in
374
+ reasonable ways as different from the original version; or
375
+
376
+ d) Limiting the use for publicity purposes of names of licensors or
377
+ authors of the material; or
378
+
379
+ e) Declining to grant rights under trademark law for use of some
380
+ trade names, trademarks, or service marks; or
381
+
382
+ f) Requiring indemnification of licensors and authors of that
383
+ material by anyone who conveys the material (or modified versions of
384
+ it) with contractual assumptions of liability to the recipient, for
385
+ any liability that these contractual assumptions directly impose on
386
+ those licensors and authors.
387
+
388
+ All other non-permissive additional terms are considered "further
389
+ restrictions" within the meaning of section 10. If the Program as you
390
+ received it, or any part of it, contains a notice stating that it is
391
+ governed by this License along with a term that is a further
392
+ restriction, you may remove that term. If a license document contains
393
+ a further restriction but permits relicensing or conveying under this
394
+ License, you may add to a covered work material governed by the terms
395
+ of that license document, provided that the further restriction does
396
+ not survive such relicensing or conveying.
397
+
398
+ If you add terms to a covered work in accord with this section, you
399
+ must place, in the relevant source files, a statement of the
400
+ additional terms that apply to those files, or a notice indicating
401
+ where to find the applicable terms.
402
+
403
+ Additional terms, permissive or non-permissive, may be stated in the
404
+ form of a separately written license, or stated as exceptions;
405
+ the above requirements apply either way.
406
+
407
+ 8. Termination.
408
+
409
+ You may not propagate or modify a covered work except as expressly
410
+ provided under this License. Any attempt otherwise to propagate or
411
+ modify it is void, and will automatically terminate your rights under
412
+ this License (including any patent licenses granted under the third
413
+ paragraph of section 11).
414
+
415
+ However, if you cease all violation of this License, then your
416
+ license from a particular copyright holder is reinstated (a)
417
+ provisionally, unless and until the copyright holder explicitly and
418
+ finally terminates your license, and (b) permanently, if the copyright
419
+ holder fails to notify you of the violation by some reasonable means
420
+ prior to 60 days after the cessation.
421
+
422
+ Moreover, your license from a particular copyright holder is
423
+ reinstated permanently if the copyright holder notifies you of the
424
+ violation by some reasonable means, this is the first time you have
425
+ received notice of violation of this License (for any work) from that
426
+ copyright holder, and you cure the violation prior to 30 days after
427
+ your receipt of the notice.
428
+
429
+ Termination of your rights under this section does not terminate the
430
+ licenses of parties who have received copies or rights from you under
431
+ this License. If your rights have been terminated and not permanently
432
+ reinstated, you do not qualify to receive new licenses for the same
433
+ material under section 10.
434
+
435
+ 9. Acceptance Not Required for Having Copies.
436
+
437
+ You are not required to accept this License in order to receive or
438
+ run a copy of the Program. Ancillary propagation of a covered work
439
+ occurring solely as a consequence of using peer-to-peer transmission
440
+ to receive a copy likewise does not require acceptance. However,
441
+ nothing other than this License grants you permission to propagate or
442
+ modify any covered work. These actions infringe copyright if you do
443
+ not accept this License. Therefore, by modifying or propagating a
444
+ covered work, you indicate your acceptance of this License to do so.
445
+
446
+ 10. Automatic Licensing of Downstream Recipients.
447
+
448
+ Each time you convey a covered work, the recipient automatically
449
+ receives a license from the original licensors, to run, modify and
450
+ propagate that work, subject to this License. You are not responsible
451
+ for enforcing compliance by third parties with this License.
452
+
453
+ An "entity transaction" is a transaction transferring control of an
454
+ organization, or substantially all assets of one, or subdividing an
455
+ organization, or merging organizations. If propagation of a covered
456
+ work results from an entity transaction, each party to that
457
+ transaction who receives a copy of the work also receives whatever
458
+ licenses to the work the party's predecessor in interest had or could
459
+ give under the previous paragraph, plus a right to possession of the
460
+ Corresponding Source of the work from the predecessor in interest, if
461
+ the predecessor has it or can get it with reasonable efforts.
462
+
463
+ You may not impose any further restrictions on the exercise of the
464
+ rights granted or affirmed under this License. For example, you may
465
+ not impose a license fee, royalty, or other charge for exercise of
466
+ rights granted under this License, and you may not initiate litigation
467
+ (including a cross-claim or counterclaim in a lawsuit) alleging that
468
+ any patent claim is infringed by making, using, selling, offering for
469
+ sale, or importing the Program or any portion of it.
470
+
471
+ 11. Patents.
472
+
473
+ A "contributor" is a copyright holder who authorizes use under this
474
+ License of the Program or a work on which the Program is based. The
475
+ work thus licensed is called the contributor's "contributor version".
476
+
477
+ A contributor's "essential patent claims" are all patent claims
478
+ owned or controlled by the contributor, whether already acquired or
479
+ hereafter acquired, that would be infringed by some manner, permitted
480
+ by this License, of making, using, or selling its contributor version,
481
+ but do not include claims that would be infringed only as a
482
+ consequence of further modification of the contributor version. For
483
+ purposes of this definition, "control" includes the right to grant
484
+ patent sublicenses in a manner consistent with the requirements of
485
+ this License.
486
+
487
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
488
+ patent license under the contributor's essential patent claims, to
489
+ make, use, sell, offer for sale, import and otherwise run, modify and
490
+ propagate the contents of its contributor version.
491
+
492
+ In the following three paragraphs, a "patent license" is any express
493
+ agreement or commitment, however denominated, not to enforce a patent
494
+ (such as an express permission to practice a patent or covenant not to
495
+ sue for patent infringement). To "grant" such a patent license to a
496
+ party means to make such an agreement or commitment not to enforce a
497
+ patent against the party.
498
+
499
+ If you convey a covered work, knowingly relying on a patent license,
500
+ and the Corresponding Source of the work is not available for anyone
501
+ to copy, free of charge and under the terms of this License, through a
502
+ publicly available network server or other readily accessible means,
503
+ then you must either (1) cause the Corresponding Source to be so
504
+ available, or (2) arrange to deprive yourself of the benefit of the
505
+ patent license for this particular work, or (3) arrange, in a manner
506
+ consistent with the requirements of this License, to extend the patent
507
+ license to downstream recipients. "Knowingly relying" means you have
508
+ actual knowledge that, but for the patent license, your conveying the
509
+ covered work in a country, or your recipient's use of the covered work
510
+ in a country, would infringe one or more identifiable patents in that
511
+ country that you have reason to believe are valid.
512
+
513
+ If, pursuant to or in connection with a single transaction or
514
+ arrangement, you convey, or propagate by procuring conveyance of, a
515
+ covered work, and grant a patent license to some of the parties
516
+ receiving the covered work authorizing them to use, propagate, modify
517
+ or convey a specific copy of the covered work, then the patent license
518
+ you grant is automatically extended to all recipients of the covered
519
+ work and works based on it.
520
+
521
+ A patent license is "discriminatory" if it does not include within
522
+ the scope of its coverage, prohibits the exercise of, or is
523
+ conditioned on the non-exercise of one or more of the rights that are
524
+ specifically granted under this License. You may not convey a covered
525
+ work if you are a party to an arrangement with a third party that is
526
+ in the business of distributing software, under which you make payment
527
+ to the third party based on the extent of your activity of conveying
528
+ the work, and under which the third party grants, to any of the
529
+ parties who would receive the covered work from you, a discriminatory
530
+ patent license (a) in connection with copies of the covered work
531
+ conveyed by you (or copies made from those copies), or (b) primarily
532
+ for and in connection with specific products or compilations that
533
+ contain the covered work, unless you entered into that arrangement,
534
+ or that patent license was granted, prior to 28 March 2007.
535
+
536
+ Nothing in this License shall be construed as excluding or limiting
537
+ any implied license or other defenses to infringement that may
538
+ otherwise be available to you under applicable patent law.
539
+
540
+ 12. No Surrender of Others' Freedom.
541
+
542
+ If conditions are imposed on you (whether by court order, agreement or
543
+ otherwise) that contradict the conditions of this License, they do not
544
+ excuse you from the conditions of this License. If you cannot convey a
545
+ covered work so as to satisfy simultaneously your obligations under this
546
+ License and any other pertinent obligations, then as a consequence you may
547
+ not convey it at all. For example, if you agree to terms that obligate you
548
+ to collect a royalty for further conveying from those to whom you convey
549
+ the Program, the only way you could satisfy both those terms and this
550
+ License would be to refrain entirely from conveying the Program.
551
+
552
+ 13. Use with the GNU Affero General Public License.
553
+
554
+ Notwithstanding any other provision of this License, you have
555
+ permission to link or combine any covered work with a work licensed
556
+ under version 3 of the GNU Affero General Public License into a single
557
+ combined work, and to convey the resulting work. The terms of this
558
+ License will continue to apply to the part which is the covered work,
559
+ but the special requirements of the GNU Affero General Public License,
560
+ section 13, concerning interaction through a network will apply to the
561
+ combination as such.
562
+
563
+ 14. Revised Versions of this License.
564
+
565
+ The Free Software Foundation may publish revised and/or new versions of
566
+ the GNU General Public License from time to time. Such new versions will
567
+ be similar in spirit to the present version, but may differ in detail to
568
+ address new problems or concerns.
569
+
570
+ Each version is given a distinguishing version number. If the
571
+ Program specifies that a certain numbered version of the GNU General
572
+ Public License "or any later version" applies to it, you have the
573
+ option of following the terms and conditions either of that numbered
574
+ version or of any later version published by the Free Software
575
+ Foundation. If the Program does not specify a version number of the
576
+ GNU General Public License, you may choose any version ever published
577
+ by the Free Software Foundation.
578
+
579
+ If the Program specifies that a proxy can decide which future
580
+ versions of the GNU General Public License can be used, that proxy's
581
+ public statement of acceptance of a version permanently authorizes you
582
+ to choose that version for the Program.
583
+
584
+ Later license versions may give you additional or different
585
+ permissions. However, no additional obligations are imposed on any
586
+ author or copyright holder as a result of your choosing to follow a
587
+ later version.
588
+
589
+ 15. Disclaimer of Warranty.
590
+
591
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592
+ APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594
+ OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597
+ IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598
+ ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599
+
600
+ 16. Limitation of Liability.
601
+
602
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604
+ THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605
+ GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606
+ USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608
+ PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609
+ EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610
+ SUCH DAMAGES.
611
+
612
+ 17. Interpretation of Sections 15 and 16.
613
+
614
+ If the disclaimer of warranty and limitation of liability provided
615
+ above cannot be given local legal effect according to their terms,
616
+ reviewing courts shall apply local law that most closely approximates
617
+ an absolute waiver of all civil liability in connection with the
618
+ Program, unless a warranty or assumption of liability accompanies a
619
+ copy of the Program in return for a fee.
620
+
621
+ END OF TERMS AND CONDITIONS
622
+
623
+ How to Apply These Terms to Your New Programs
624
+
625
+ If you develop a new program, and you want it to be of the greatest
626
+ possible use to the public, the best way to achieve this is to make it
627
+ free software which everyone can redistribute and change under these terms.
628
+
629
+ To do so, attach the following notices to the program. It is safest
630
+ to attach them to the start of each source file to most effectively
631
+ state the exclusion of warranty; and each file should have at least
632
+ the "copyright" line and a pointer to where the full notice is found.
633
+
634
+ {one line to give the program's name and a brief idea of what it does.}
635
+ Copyright (C) {year} {name of author}
636
+
637
+ This program is free software: you can redistribute it and/or modify
638
+ it under the terms of the GNU General Public License as published by
639
+ the Free Software Foundation, either version 3 of the License, or
640
+ (at your option) any later version.
641
+
642
+ This program is distributed in the hope that it will be useful,
643
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
644
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645
+ GNU General Public License for more details.
646
+
647
+ You should have received a copy of the GNU General Public License
648
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
649
+
650
+ Also add information on how to contact you by electronic and paper mail.
651
+
652
+ If the program does terminal interaction, make it output a short
653
+ notice like this when it starts in an interactive mode:
654
+
655
+ {project} Copyright (C) {year} {fullname}
656
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657
+ This is free software, and you are welcome to redistribute it
658
+ under certain conditions; type `show c' for details.
659
+
660
+ The hypothetical commands `show w' and `show c' should show the appropriate
661
+ parts of the General Public License. Of course, your program's commands
662
+ might be different; for a GUI interface, you would use an "about box".
663
+
664
+ You should also get your employer (if you work as a programmer) or school,
665
+ if any, to sign a "copyright disclaimer" for the program, if necessary.
666
+ For more information on this, and how to apply and follow the GNU GPL, see
667
+ <http://www.gnu.org/licenses/>.
668
+
669
+ The GNU General Public License does not permit incorporating your program
670
+ into proprietary programs. If your program is a subroutine library, you
671
+ may consider it more useful to permit linking proprietary applications with
672
+ the library. If this is what you want to do, use the GNU Lesser General
673
+ Public License instead of this License. But first, please read
674
+ <http://www.gnu.org/philosophy/why-not-lgpl.html>.
medicationCategories/MISC_list.txt ADDED
File without changes
medicationCategories/README.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ medicationCategories
2
+ ====================
3
+
4
+ %--------------------------------
5
+ Written by Marzyeh Ghassemi, CSAIL, MIT
6
+ Sept 21, 2012
7
+ Updated 13 Nov, 2017
8
+ Please contact the author with errors found.
9
+ mghassem {AT} mit {DOT} edu
10
+ %--------------------------------
11
+
12
+ Quick script to parse out medications from discharge summaries in MIMIC format. Note that this approach is brute force: it uses minimal NLP, and can be vastly improved. (hint, hint)
13
+
14
+ If you use this code, please cite the GitHub project (see below for Bibtex):
15
+ @misc{Ghassemi2012,
16
+ author = {Ghassemi, Marzyeh},
17
+ title = {Discharge Summary Based Pre-Admission (Home) Medication Parser},
18
+ year = {2012},
19
+ publisher = {GitHub},
20
+ journal = {GitHub repository},
21
+ howpublished = {\url{https://github.com/mghassem/medicationCategories}},
22
+ commit = {PASTE THE COMMIT VERSION YOU'RE USING HERE}
23
+ }
24
+
25
+ The script is set up to run on the MIMIC `noteevents` table in a Postgres database. Please refer to the Jupyter Notebook (finddrugs.ipynb) for example usage.
26
+
27
+ Thanks!
28
+ Marzyeh
medicationCategories/SSRI_list.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Citalopram|Celexa
2
+ Escitalopram|Lexapro
3
+ Fluoxetine|Prozac|Serafem|Selfemra
4
+ Fluvoxamine|Luvox
5
+ Paroxetine|Paxil|Pexeva
6
+ Sertraline|Zoloft
medicationCategories/finddrugs.ipynb ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Parse out medications from discharge summaries in MIMIC "
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": null,
13
+ "metadata": {
14
+ "collapsed": true
15
+ },
16
+ "outputs": [],
17
+ "source": [
18
+ "import os\n",
19
+ "import pandas as pd\n",
20
+ "import psycopg2\n",
21
+ "import getpass\n",
22
+ "import re\n",
23
+ "\n",
24
+ "# Import the finddrugs module\n",
25
+ "import finddrugs"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "metadata": {
32
+ "collapsed": true
33
+ },
34
+ "outputs": [],
35
+ "source": [
36
+ "# Create a database connection\n",
37
+ "user = 'postgres'\n",
38
+ "host = 'localhost'\n",
39
+ "dbname = 'mimic'\n",
40
+ "schema = 'mimiciii'"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "# Connect to the database\n",
50
+ "con = psycopg2.connect(dbname=dbname, user=user, host=host, \n",
51
+ " password=getpass.getpass(prompt='Password:'.format(user)))\n",
52
+ "cur = con.cursor()\n",
53
+ "cur.execute('SET search_path to {}'.format(schema))"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": null,
59
+ "metadata": {
60
+ "collapsed": true
61
+ },
62
+ "outputs": [],
63
+ "source": [
64
+ "# Get the notes to be analysed\n",
65
+ "# NOTE: we are limiting the query to 100 rows here\n",
66
+ "query = \\\n",
67
+ "\"\"\"\n",
68
+ "SELECT n.row_id, n.subject_id, n.hadm_id, n.category, n.description, n.text\n",
69
+ "FROM noteevents n\n",
70
+ "WHERE n.category like 'Discharge summary'\n",
71
+ "LIMIT 100;\n",
72
+ "\"\"\"\n",
73
+ "\n",
74
+ "data = pd.read_sql_query(query,con)"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": null,
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "data.head()"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": null,
89
+ "metadata": {},
90
+ "outputs": [],
91
+ "source": [
92
+ "# Search the notes\n",
93
+ "finddrugs.search(data)"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "code",
98
+ "execution_count": 9,
99
+ "metadata": {},
100
+ "outputs": [
101
+ {
102
+ "data": {
103
+ "text/html": [
104
+ "<div>\n",
105
+ "<style>\n",
106
+ " .dataframe thead tr:only-child th {\n",
107
+ " text-align: right;\n",
108
+ " }\n",
109
+ "\n",
110
+ " .dataframe thead th {\n",
111
+ " text-align: left;\n",
112
+ " }\n",
113
+ "\n",
114
+ " .dataframe tbody tr th {\n",
115
+ " vertical-align: top;\n",
116
+ " }\n",
117
+ "</style>\n",
118
+ "<table border=\"1\" class=\"dataframe\">\n",
119
+ " <thead>\n",
120
+ " <tr style=\"text-align: right;\">\n",
121
+ " <th></th>\n",
122
+ " <th>ROW_ID</th>\n",
123
+ " <th>SUBJECT_ID</th>\n",
124
+ " <th>HADM_ID</th>\n",
125
+ " <th>HIST_FOUND</th>\n",
126
+ " <th>DEPRESSION</th>\n",
127
+ " <th>ADMIT_FOUND</th>\n",
128
+ " <th>DIS_FOUND</th>\n",
129
+ " <th>GEN_DEPRESS_MEDS_FOUND</th>\n",
130
+ " <th>GROUP</th>\n",
131
+ " <th>SSRI</th>\n",
132
+ " <th>MISC</th>\n",
133
+ " <th>citalopram</th>\n",
134
+ " <th>escitalopram</th>\n",
135
+ " <th>fluoxetine</th>\n",
136
+ " <th>fluvoxamine</th>\n",
137
+ " <th>paroxetine</th>\n",
138
+ " <th>sertraline</th>\n",
139
+ " </tr>\n",
140
+ " </thead>\n",
141
+ " <tbody>\n",
142
+ " <tr>\n",
143
+ " <th>0</th>\n",
144
+ " <td>174</td>\n",
145
+ " <td>22532</td>\n",
146
+ " <td>167853</td>\n",
147
+ " <td>0</td>\n",
148
+ " <td>0</td>\n",
149
+ " <td>0</td>\n",
150
+ " <td>0</td>\n",
151
+ " <td>0</td>\n",
152
+ " <td>0</td>\n",
153
+ " <td>0</td>\n",
154
+ " <td>0</td>\n",
155
+ " <td>0</td>\n",
156
+ " <td>0</td>\n",
157
+ " <td>0</td>\n",
158
+ " <td>0</td>\n",
159
+ " <td>0</td>\n",
160
+ " <td>0</td>\n",
161
+ " </tr>\n",
162
+ " <tr>\n",
163
+ " <th>1</th>\n",
164
+ " <td>175</td>\n",
165
+ " <td>13702</td>\n",
166
+ " <td>107527</td>\n",
167
+ " <td>1</td>\n",
168
+ " <td>0</td>\n",
169
+ " <td>1</td>\n",
170
+ " <td>1</td>\n",
171
+ " <td>0</td>\n",
172
+ " <td>3</td>\n",
173
+ " <td>1</td>\n",
174
+ " <td>0</td>\n",
175
+ " <td>0</td>\n",
176
+ " <td>1</td>\n",
177
+ " <td>0</td>\n",
178
+ " <td>0</td>\n",
179
+ " <td>0</td>\n",
180
+ " <td>0</td>\n",
181
+ " </tr>\n",
182
+ " <tr>\n",
183
+ " <th>2</th>\n",
184
+ " <td>176</td>\n",
185
+ " <td>13702</td>\n",
186
+ " <td>167118</td>\n",
187
+ " <td>1</td>\n",
188
+ " <td>1</td>\n",
189
+ " <td>1</td>\n",
190
+ " <td>1</td>\n",
191
+ " <td>0</td>\n",
192
+ " <td>3</td>\n",
193
+ " <td>1</td>\n",
194
+ " <td>0</td>\n",
195
+ " <td>0</td>\n",
196
+ " <td>1</td>\n",
197
+ " <td>0</td>\n",
198
+ " <td>0</td>\n",
199
+ " <td>0</td>\n",
200
+ " <td>0</td>\n",
201
+ " </tr>\n",
202
+ " <tr>\n",
203
+ " <th>3</th>\n",
204
+ " <td>177</td>\n",
205
+ " <td>13702</td>\n",
206
+ " <td>196489</td>\n",
207
+ " <td>1</td>\n",
208
+ " <td>1</td>\n",
209
+ " <td>1</td>\n",
210
+ " <td>1</td>\n",
211
+ " <td>0</td>\n",
212
+ " <td>3</td>\n",
213
+ " <td>1</td>\n",
214
+ " <td>0</td>\n",
215
+ " <td>0</td>\n",
216
+ " <td>1</td>\n",
217
+ " <td>0</td>\n",
218
+ " <td>0</td>\n",
219
+ " <td>0</td>\n",
220
+ " <td>0</td>\n",
221
+ " </tr>\n",
222
+ " <tr>\n",
223
+ " <th>4</th>\n",
224
+ " <td>178</td>\n",
225
+ " <td>26880</td>\n",
226
+ " <td>135453</td>\n",
227
+ " <td>1</td>\n",
228
+ " <td>0</td>\n",
229
+ " <td>1</td>\n",
230
+ " <td>1</td>\n",
231
+ " <td>0</td>\n",
232
+ " <td>3</td>\n",
233
+ " <td>1</td>\n",
234
+ " <td>0</td>\n",
235
+ " <td>1</td>\n",
236
+ " <td>0</td>\n",
237
+ " <td>0</td>\n",
238
+ " <td>0</td>\n",
239
+ " <td>0</td>\n",
240
+ " <td>0</td>\n",
241
+ " </tr>\n",
242
+ " </tbody>\n",
243
+ "</table>\n",
244
+ "</div>"
245
+ ],
246
+ "text/plain": [
247
+ " ROW_ID SUBJECT_ID HADM_ID HIST_FOUND DEPRESSION ADMIT_FOUND \\\n",
248
+ "0 174 22532 167853 0 0 0 \n",
249
+ "1 175 13702 107527 1 0 1 \n",
250
+ "2 176 13702 167118 1 1 1 \n",
251
+ "3 177 13702 196489 1 1 1 \n",
252
+ "4 178 26880 135453 1 0 1 \n",
253
+ "\n",
254
+ " DIS_FOUND GEN_DEPRESS_MEDS_FOUND GROUP SSRI MISC citalopram \\\n",
255
+ "0 0 0 0 0 0 0 \n",
256
+ "1 1 0 3 1 0 0 \n",
257
+ "2 1 0 3 1 0 0 \n",
258
+ "3 1 0 3 1 0 0 \n",
259
+ "4 1 0 3 1 0 1 \n",
260
+ "\n",
261
+ " escitalopram fluoxetine fluvoxamine paroxetine sertraline \n",
262
+ "0 0 0 0 0 0 \n",
263
+ "1 1 0 0 0 0 \n",
264
+ "2 1 0 0 0 0 \n",
265
+ "3 1 0 0 0 0 \n",
266
+ "4 0 0 0 0 0 "
267
+ ]
268
+ },
269
+ "execution_count": 9,
270
+ "metadata": {},
271
+ "output_type": "execute_result"
272
+ }
273
+ ],
274
+ "source": [
275
+ "# load the output to a dataframe\n",
276
+ "medications = pd.read_csv('output.csv')\n",
277
+ "medications.head()"
278
+ ]
279
+ }
280
+ ],
281
+ "metadata": {
282
+ "kernelspec": {
283
+ "display_name": "Python 3",
284
+ "language": "python",
285
+ "name": "python3"
286
+ },
287
+ "language_info": {
288
+ "codemirror_mode": {
289
+ "name": "ipython",
290
+ "version": 3
291
+ },
292
+ "file_extension": ".py",
293
+ "mimetype": "text/x-python",
294
+ "name": "python",
295
+ "nbconvert_exporter": "python",
296
+ "pygments_lexer": "ipython3",
297
+ "version": "3.6.1"
298
+ }
299
+ },
300
+ "nbformat": 4,
301
+ "nbformat_minor": 2
302
+ }
medicationCategories/finddrugs.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+
3
+ #--------------------------------
4
+ # Written by Marzyeh Ghassemi, CSAIL, MIT
5
+ # Sept 21, 2012
6
+ # Updated for Python 3, added Notebook, db connection
7
+ # by Tom J. Pollard 13 Nov, 2017
8
+ # Please contact the author with errors found.
9
+ # mghassem {AT} mit {DOT} edu
10
+ #--------------------------------
11
+
12
+ from __future__ import with_statement
13
+ import nltk
14
+ import os
15
+ import os.path
16
+ import re
17
+ import string
18
+ import sys
19
+ import time
20
+
21
+ def addToDrugs(line, drugs, listing, genList):
22
+ """
23
+ ###### function addToDrugs
24
+ # line: line of text to search
25
+ # drugs: array to modify
26
+ # listing: list of search terms in (generic:search list) form
27
+ # genList: list of all generic keys being searched for
28
+ #
29
+ # Searches the provided line for drugs that are listed. Inserts
30
+ # a 1 in the drugs array provided at the location which maps
31
+ # the found key to the generics list
32
+ """
33
+ genList = dict(enumerate(genList))
34
+ genList = dict((v,k) for k, v in genList.items())
35
+
36
+ for (generic, names) in listing.items():
37
+ if re.search(names, line, re.I):
38
+ drugs[genList[generic]] = 1
39
+ return drugs
40
+
41
+ def readDrugs(f, genList):
42
+ """
43
+ ###### function readDrugs
44
+ # f: file
45
+ # genList: list of search terms in (generic:search list) form
46
+ #
47
+ # Converts lines of the form "generic|brand1|brand2" to a
48
+ # dictionary keyed by "generic" with value "generic|brand1|brand2
49
+ """
50
+ lines = f.read()
51
+ generics = re.findall("^(.*?)\|", lines, re.MULTILINE)
52
+ generics = [x.lower() for x in generics]
53
+ lines = lines.split("\n")
54
+ lines = [x.lower() for x in lines]
55
+ genList.append(generics)
56
+ return dict(zip(generics, lines))
57
+
58
+ def search(NOTES,
59
+ SSRI_FILE = os.path.join(os.getcwd(), "SSRI_list.txt"),
60
+ MISC_FILE = os.path.join(os.getcwd(), "MISC_list.txt"),
61
+ SUMMARY_FILE = "output.csv",
62
+ VERBOSE = False):
63
+ """
64
+ ###### Search the notes
65
+ # NOTES: dataframe loaded from the noteevents table
66
+ # SSRI_FILE: list of SSRI drugs to search for
67
+ # MISC_FILE: list of additional drugs to search for
68
+ #
69
+ # NB: files should have a line for each distinct drug type,
70
+ # and drugs should be separated by a vertical bar '|'
71
+ #
72
+ # LIMIT FOR PARSING: max number of notes to search.
73
+ # OUTPUT: name of the output file.
74
+ """
75
+
76
+ if os.path.isfile(SUMMARY_FILE):
77
+ print('The output file already exists.\n\nRemove the following file or save with a different filename:')
78
+ print(os.path.join(os.getcwd(), SUMMARY_FILE))
79
+ return
80
+
81
+ starttime = time.time()
82
+
83
+ # Keep a list of all generics we are looking for
84
+ genList = []
85
+
86
+ # Get the drugs into a structure we can use
87
+ with open(SSRI_FILE) as f:
88
+ SSRI = readDrugs(f, genList)
89
+ print("Using drugs from {}".format(SSRI_FILE))
90
+ try:
91
+ with open(MISC_FILE) as f:
92
+ MISC = readDrugs(f, genList)
93
+ print("Using additional drugs from {}".format(MISC_FILE))
94
+ except:
95
+ MISC = None
96
+ flatList = [item for sublist in genList for item in sublist]
97
+
98
+ # Create indices for the flat list
99
+ # This allows us to understand which "types" are being used
100
+ lengths = [len(type) for type in genList]
101
+ prevLeng = 0
102
+ starts = []
103
+ ends = []
104
+ for leng in lengths:
105
+ starts.append(prevLeng)
106
+ ends.append(prevLeng + leng - 1)
107
+ prevLeng = prevLeng + leng
108
+
109
+ # Limit the analysis to discharge summaries
110
+ # Comment out because limitation is now in SQL query
111
+ # NOTES = NOTES[NOTES['category'] == 'Discharge summary']
112
+
113
+ # Write heads and notes to new doc
114
+ with open(SUMMARY_FILE, 'a') as f_out:
115
+ f_out.write('"ROW_ID","SUBJECT_ID","HADM_ID","HIST_FOUND","DEPRESSION","ADMIT_FOUND","DIS_FOUND","GEN_DEPRESS_MEDS_FOUND","GROUP","SSRI","MISC","' \
116
+ + '","'.join(flatList) + '"\n')
117
+
118
+ # Parse each patient record
119
+ print("Reading documents...")
120
+
121
+ for note in NOTES.itertuples():
122
+ if note.Index % 100 == 0:
123
+ print("...index: {}. row_id: {}. subject_id: {}. hadm_id: {}. \n".format(note.Index, note.row_id, note.subject_id, note.hadm_id))
124
+ sys.stdout.flush()
125
+
126
+ # Reset some per-patient variables
127
+ section = ""
128
+ newSection = ""
129
+ admitFound = 0 # admission note found
130
+ dischargeFound = 0 # discharge summary found
131
+ histFound = 0 # medical history found
132
+ depressionHist = 0;
133
+ drugsAdmit = [0]*len(flatList)
134
+ drugsDis = [0]*len(flatList)
135
+ general_depression_drugs = 0
136
+
137
+ # Read through lines sequentially
138
+ # If this looks like a section header, start looking for drugs
139
+ for line in note.text.split("\n"):
140
+
141
+ # Searches for a section header based on heuristics
142
+ m = re.search("""^((\d|[A-Z])(\.|\)))?\s*([a-zA-Z',\.\-\*\d\[\]\(\) ]+)(:| WERE | IS | ARE |INCLUDED|INCLUDING)""", line, re.I)
143
+ if m:
144
+ newSection = ""
145
+ # Past Medical History Section
146
+ if re.search('med(ical)?\s+hist(ory)?', line, re.I):
147
+ newSection = "hist"
148
+ histFound = 1
149
+
150
+ # Discharge Medication Section
151
+ elif re.search('medication|meds', line, re.I) and re.search('disch(arge)?', line, re.I):
152
+ newSection = "discharge"
153
+ dischargeFound = 1
154
+
155
+ # Admitting Medication Section
156
+ elif re.search('admission|admitting|home|nh|nmeds|pre(\-|\s)?(hosp|op)|current|previous|outpatient|outpt|outside|^[^a-zA-Z]*med(ication)?(s)?', line, re.I) \
157
+ and (section == "admit" or re.search('medication|meds', line, re.I)):
158
+ newSection = "admit"
159
+ admitFound = 1
160
+
161
+ # Med section ended, now in non-meds section
162
+ if section != newSection:
163
+ section = newSection
164
+
165
+ # If in history section, search for depression
166
+ if 'hist' in section:
167
+ if re.search('depression', line, re.I):
168
+ depressionHist = 1
169
+
170
+ # If in meds section, look at each line for specific drugs
171
+ elif 'admit' in section:
172
+ drugsAdmit = addToDrugs(line, drugsAdmit, SSRI, flatList)
173
+ if MISC:
174
+ drugsAdmit = addToDrugs(line, drugsAdmit, MISC, flatList)
175
+
176
+ ## Section just has something like 'Depression meds'
177
+ if re.search('depression\s+med(ication)?(s)?', line, re.I):
178
+ general_depression_drugs = 1
179
+
180
+ ## Already in meds section, look at each line for specific drugs
181
+ elif 'discharge' in section:
182
+ drugsDis = addToDrugs(line, drugsDis, SSRI, flatList)
183
+ if MISC:
184
+ drugsDis = addToDrugs(line, drugsDis, MISC, flatList)
185
+
186
+ # A line with information which we are uncertain about...
187
+ elif re.search('medication|meds', line, re.I) and re.search('admission|discharge|transfer', line, re.I):
188
+ if VERBOSE:
189
+ print('?? {}'.format(line))
190
+ pass
191
+
192
+ group = 0
193
+ # Group 0: Patient has no medications on admission section (or no targeted meds)
194
+ # and medications on discharge from the list
195
+ if dischargeFound == 1 and (1 in drugsDis) and (admitFound == 0 or not(1 in drugsAdmit)):
196
+ group = 0
197
+
198
+ # Group 1: Patient has a medications on admission section with no targeted meds
199
+ # and no medications on discharge
200
+ elif admitFound == 1 and not(1 in drugsAdmit) and (dischargeFound == 0) and general_depression_drugs == 0:
201
+ group = 1
202
+
203
+ # Group 2: Patient has medications on admission section, but none from the list
204
+ # and no medications on discharge from the list
205
+ elif admitFound == 1 and not(1 in drugsAdmit) and dischargeFound == 1 and not(1 in drugsDis) and general_depression_drugs == 0:
206
+ group = 2
207
+
208
+ # Group 3: Patient has medications on admission (at least one from the list)
209
+ elif (1 in drugsAdmit):
210
+ group = 3
211
+
212
+ else:
213
+ if VERBOSE:
214
+ print('Uncertain about group type for row_id = {}'.format(note.row_id))
215
+ pass
216
+
217
+ if VERBOSE:
218
+ print('group is {}'.format(group))
219
+
220
+ # Combine the admit and discharge drugs lists
221
+ combined = [w or x for w, x in zip(drugsAdmit, drugsDis)]
222
+
223
+ # Count the types of each drug
224
+ member = []
225
+ member = [int(1 in drugsAdmit[s:e+1]) for s, e in zip(starts, ends)]
226
+
227
+ # save items to csv
228
+ f_out.write(str(note.row_id) + "," + str(note.subject_id) + "," + str(note.hadm_id) + "," + str(histFound) + "," \
229
+ + str(depressionHist) + "," + str(admitFound) + "," + str(dischargeFound) + "," \
230
+ + str(general_depression_drugs) + "," + str(group) + "," + ",".join(map(str, member)) \
231
+ + "," + ",".join(map(str, drugsAdmit)) + "\n")
232
+
233
+ # Print summary of analysis
234
+ stoptime = time.time()
235
+ print("Done analyzing {} documents in {} seconds ({} docs/sec)".format(len(NOTES),
236
+ round(stoptime - starttime, 2), round(len(NOTES) / (stoptime - starttime), 2)))
237
+ print("Summary file is in {}".format(os.getcwd()))
reports/p_1234/CBC.pdf ADDED
Binary file (70.8 kB). View file
 
reports/p_1234/DENGUE.pdf ADDED
Binary file (71.6 kB). View file
 
reports/p_1234/Malaria.pdf ADDED
Binary file (85.8 kB). View file
 
reports/p_1234/THYROID.pdf ADDED
Binary file (71.2 kB). View file
 
reports/p_4567/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (70.8 kB). View file
 
reports/p_4567/DENGUE-FEVER-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (71.6 kB). View file
 
reports/p_4567/Malaria-parasite-identification-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (86.4 kB). View file
 
reports/p_4567/Positive-Malaria-parasite-identification-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (85.8 kB). View file
 
reports/p_4567/THYROID-ANTIBODIES-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (71.2 kB). View file
 
reports/p_4567/THYROID-PROFILE-test-report-format-example-sample-template-Drlogy-lab-report.pdf ADDED
Binary file (69.7 kB). View file
 
req2.txt ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.10.0
2
+ aiofiles==24.1.0
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.12.15
5
+ aiosignal==1.4.0
6
+ annotated-types==0.7.0
7
+ antlr4-python3-runtime==4.9.3
8
+ anyio==4.10.0
9
+ asgiref==3.9.1
10
+ attrs==25.3.0
11
+ audioop-lts==0.2.2
12
+ backoff==2.2.1
13
+ beautifulsoup4==4.13.4
14
+ bidict==0.23.1
15
+ blinker==1.9.0
16
+ bloatectomy==0.0.12
17
+ cachetools==5.5.2
18
+ certifi==2025.8.3
19
+ cffi==1.17.1
20
+ charset-normalizer==3.4.3
21
+ click==8.2.1
22
+ colorama==0.4.6
23
+ coloredlogs==15.0.1
24
+ contourpy==1.3.3
25
+ cryptography==45.0.6
26
+ cycler==0.12.1
27
+ dataclasses-json==0.6.7
28
+ Deprecated==1.2.18
29
+ distro==1.9.0
30
+ effdet==0.4.1
31
+ emoji==2.14.1
32
+ et_xmlfile==2.0.0
33
+ faiss-cpu==1.12.0
34
+ filelock==3.19.1
35
+ filetype==1.2.0
36
+ Flask==3.1.2
37
+ flask-cors==6.0.1
38
+ Flask-SocketIO==5.5.1
39
+ flatbuffers==25.2.10
40
+ fonttools==4.59.1
41
+ frozenlist==1.7.0
42
+ fsspec==2025.7.0
43
+ ftfy==6.3.1
44
+ google-api-core==2.25.1
45
+ google-auth==2.40.3
46
+ google-cloud-vision==3.10.2
47
+ googleapis-common-protos==1.70.0
48
+ greenlet==3.2.4
49
+ groq==0.31.0
50
+ grpcio==1.74.0
51
+ grpcio-status==1.74.0
52
+ gunicorn==23.0.0
53
+ h11==0.16.0
54
+ html5lib==1.1
55
+ httpcore==1.0.9
56
+ httpx==0.28.1
57
+ httpx-sse==0.4.1
58
+ huggingface-hub==0.34.4
59
+ humanfriendly==10.0
60
+ idna==3.10
61
+ itsdangerous==2.2.0
62
+ Jinja2==3.1.6
63
+ jiter==0.10.0
64
+ joblib==1.5.1
65
+ jsonpatch==1.33
66
+ jsonpointer==3.0.0
67
+ kiwisolver==1.4.9
68
+ langchain==0.3.27
69
+ langchain-community==0.3.27
70
+ langchain-core==0.3.74
71
+ langchain-experimental==0.3.4
72
+ langchain-groq==0.3.7
73
+ langchain-openai==0.3.31
74
+ langchain-text-splitters==0.3.9
75
+ langdetect==1.0.9
76
+ langgraph==0.6.6
77
+ langgraph-checkpoint==2.1.1
78
+ langgraph-prebuilt==0.6.4
79
+ langgraph-sdk==0.2.3
80
+ langsmith==0.4.15
81
+ lxml==6.0.1
82
+ Markdown==3.8.2
83
+ MarkupSafe==3.0.2
84
+ marshmallow==3.26.1
85
+ matplotlib==3.10.5
86
+ mkl-service==2.4.0
87
+ mkl_fft @ file:///C:/Users/dev-admin/mkl/mkl_fft_1730824198951/work
88
+ mkl_random @ file:///C:/Users/dev-admin/mkl/mkl_random_1730824302834/work
89
+ mpmath==1.3.0
90
+ msoffcrypto-tool==5.4.2
91
+ multidict==6.6.4
92
+ mypy_extensions==1.1.0
93
+ networkx==3.5
94
+ nltk==3.9.1
95
+ numpy==2.2.6
96
+ olefile==0.47
97
+ omegaconf==2.3.0
98
+ onnx==1.18.0
99
+ onnxruntime==1.22.1
100
+ open_clip_torch==3.1.0
101
+ openai==1.101.0
102
+ opencv-python==4.12.0.88
103
+ openpyxl==3.1.5
104
+ orjson==3.11.2
105
+ ormsgpack==1.10.0
106
+ packaging==25.0
107
+ pandas==2.3.2
108
+ pdf2image==1.17.0
109
+ pdfminer.six==20250506
110
+ pi_heif==1.1.0
111
+ pikepdf==9.10.2
112
+ pillow==11.3.0
113
+ propcache==0.3.2
114
+ proto-plus==1.26.1
115
+ protobuf==6.32.0
116
+ psutil==7.0.0
117
+ pyasn1==0.6.1
118
+ pyasn1_modules==0.4.2
119
+ pycocotools==2.0.10
120
+ pycparser==2.22
121
+ pydantic==2.11.7
122
+ pydantic-settings==2.10.1
123
+ pydantic_core==2.33.2
124
+ pypandoc==1.15
125
+ pyparsing==3.2.3
126
+ pypdf==6.0.0
127
+ PyPDF2==3.0.1
128
+ pypdfium2==4.30.0
129
+ pyreadline3==3.5.4
130
+ pytesseract==0.3.13
131
+ python-dateutil==2.9.0.post0
132
+ python-docx==1.2.0
133
+ python-dotenv==1.1.1
134
+ python-engineio==4.12.2
135
+ python-iso639==2025.2.18
136
+ python-magic==0.4.27
137
+ python-multipart==0.0.20
138
+ python-oxmsg==0.0.2
139
+ python-pptx==1.0.2
140
+ python-socketio==5.13.0
141
+ pytz==2025.2
142
+ PyYAML==6.0.2
143
+ RapidFuzz==3.13.0
144
+ regex==2025.7.34
145
+ requests==2.32.5
146
+ requests-toolbelt==1.0.0
147
+ rsa==4.9.1
148
+ safetensors==0.6.2
149
+ scikit-learn==1.7.1
150
+ scipy==1.16.1
151
+ sentence-transformers==5.1.0
152
+ setuptools==72.1.0
153
+ simple-websocket==1.1.0
154
+ six==1.17.0
155
+ sniffio==1.3.1
156
+ soupsieve==2.7
157
+ SpeechRecognition==3.14.3
158
+ SQLAlchemy==2.0.43
159
+ standard-aifc==3.13.0
160
+ standard-chunk==3.13.0
161
+ sympy==1.14.0
162
+ tenacity==9.1.2
163
+ threadpoolctl==3.6.0
164
+ tiktoken==0.11.0
165
+ timm==1.0.19
166
+ tokenizers==0.21.4
167
+ torch==2.8.0
168
+ torchvision==0.23.0
169
+ tqdm==4.67.1
170
+ transformers==4.55.3
171
+ typing-inspect==0.9.0
172
+ typing-inspection==0.4.1
173
+ typing_extensions==4.14.1
174
+ tzdata==2025.2
175
+ unstructured==0.18.13
176
+ unstructured-client==0.42.3
177
+ unstructured-inference==1.0.5
178
+ unstructured.pytesseract==0.3.15
179
+ urllib3==2.5.0
180
+ wcwidth==0.2.13
181
+ webencodings==0.5.1
182
+ Werkzeug==3.1.3
183
+ wheel==0.45.1
184
+ wrapt==1.17.3
185
+ wsproto==1.2.0
186
+ xlrd==2.0.2
187
+ xlsxwriter==3.2.5
188
+ xxhash==3.5.0
189
+ yarl==1.20.1
190
+ zstandard==0.24.0
static/frontend.html ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>Health Reports Processor</title>
7
+ <style>
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ margin: 2rem;
11
+ max-width: 800px;
12
+ }
13
+ label {
14
+ display: block;
15
+ margin-top: 1rem;
16
+ font-weight: bold;
17
+ }
18
+ input, textarea, button {
19
+ width: 100%;
20
+ padding: 0.5rem;
21
+ margin-top: 0.25rem;
22
+ box-sizing: border-box;
23
+ font-size: 1rem;
24
+ }
25
+ button {
26
+ margin-top: 1rem;
27
+ cursor: pointer;
28
+ }
29
+ pre {
30
+ background: #f4f4f4;
31
+ padding: 1rem;
32
+ overflow-x: auto;
33
+ white-space: pre-wrap;
34
+ word-wrap: break-word;
35
+ border: 1px solid #ccc;
36
+ margin-top: 1rem;
37
+ }
38
+ .error {
39
+ color: red;
40
+ margin-top: 1rem;
41
+ }
42
+ </style>
43
+ </head>
44
+ <body>
45
+ <h1>Health Reports Processor</h1>
46
+ <form id="reportForm">
47
+ <label for="patientId">Patient ID:</label>
48
+ <input type="text" id="patientId" name="patientId" required />
49
+
50
+ <label for="filenames">Filenames (comma-separated):</label>
51
+ <input type="text" id="filenames" name="filenames" placeholder="e.g. cbc.pdf, thyroid.pdf" required />
52
+
53
+ <button type="submit">Process Reports</button>
54
+ </form>
55
+
56
+ <div id="result"></div>
57
+
58
+ <script>
59
+ const form = document.getElementById('reportForm');
60
+ const resultDiv = document.getElementById('result');
61
+
62
+ form.addEventListener('submit', async (e) => {
63
+ e.preventDefault();
64
+ resultDiv.innerHTML = '';
65
+
66
+ const patientId = form.patientId.value.trim();
67
+ const filenamesRaw = form.filenames.value.trim();
68
+ if (!patientId || !filenamesRaw) {
69
+ resultDiv.innerHTML = '<p class="error">Please enter both Patient ID and filenames.</p>';
70
+ return;
71
+ }
72
+
73
+ const filenames = filenamesRaw.split(',').map(f => f.trim()).filter(f => f.length > 0);
74
+
75
+ try {
76
+ const response = await fetch('/process_reports', {
77
+ method: 'POST',
78
+ headers: { 'Content-Type': 'application/json' },
79
+ body: JSON.stringify({ patient_id: patientId, filenames: filenames })
80
+ });
81
+
82
+ if (!response.ok) {
83
+ const errorData = await response.json();
84
+ resultDiv.innerHTML = `<p class="error">Error: ${errorData.error || response.statusText}</p>`;
85
+ return;
86
+ }
87
+
88
+ const data = await response.json();
89
+ resultDiv.innerHTML = `<h2>Processed Result</h2><pre>${JSON.stringify(data, null, 2)}</pre>`;
90
+ } catch (err) {
91
+ resultDiv.innerHTML = `<p class="error">Request failed: ${err.message}</p>`;
92
+ }
93
+ });
94
+ </script>
95
+ </body>
96
+ </html>