adrienbrdne commited on
Commit
f8ac349
·
verified ·
1 Parent(s): fa98115

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +212 -0
  2. config.json +19 -0
  3. requirements.txt +16 -0
app.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from langchain_community.graphs import Neo4jGraph
4
+ import pandas as pd
5
+ import json
6
+ import time
7
+
8
+ from ki_gen.planner import build_planner_graph
9
+ from ki_gen.utils import init_app, memory
10
+ from ki_gen.prompts import get_initial_prompt
11
+
12
+ from neo4j import GraphDatabase
13
+
14
+ # Set page config
15
+ st.set_page_config(page_title="Key Issue Generator", layout="wide")
16
+
17
+ # Neo4j Database Configuration
18
+ NEO4J_URI = "neo4j+s://4985272f.databases.neo4j.io"
19
+ NEO4J_USERNAME = "neo4j"
20
+ NEO4J_PASSWORD = os.getenv("neo4j_password")
21
+
22
+ # API Keys for LLM services
23
+ OPENAI_API_KEY = os.getenv("openai_api_key")
24
+ GROQ_API_KEY = os.getenv("groq_api_key")
25
+ LANGSMITH_API_KEY = os.getenv("langsmith_api_key")
26
+
27
+ def verify_neo4j_connectivity():
28
+ """Verify connection to Neo4j database"""
29
+ try:
30
+ with GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) as driver:
31
+ return driver.verify_connectivity()
32
+ except Exception as e:
33
+ return f"Error: {str(e)}"
34
+
35
+ def load_config():
36
+ """Load configuration with custom parameters"""
37
+ # Custom configuration based on provided parameters
38
+ custom_config = {
39
+ "main_llm": "deepseek-r1-distill-llama-70b",
40
+ "plan_method": "generation",
41
+ "use_detailed_query": False,
42
+ "cypher_gen_method": "guided",
43
+ "validate_cypher": False,
44
+ "summarize_model": "deepseek-r1-distill-llama-70b",
45
+ "eval_method": "binary",
46
+ "eval_threshold": 0.7,
47
+ "max_docs": 15,
48
+ "compression_method": "llm_lingua",
49
+ "compress_rate": 0.33,
50
+ "force_tokens": ["."], # Converting to list format as expected by the application
51
+ "eval_model": "deepseek-r1-distill-llama-70b",
52
+ "thread_id": "3"
53
+ }
54
+
55
+ # Add Neo4j graph object to config
56
+ try:
57
+ neo_graph = Neo4jGraph(
58
+ url=NEO4J_URI,
59
+ username=NEO4J_USERNAME,
60
+ password=NEO4J_PASSWORD
61
+ )
62
+ custom_config["graph"] = neo_graph
63
+ except Exception as e:
64
+ st.error(f"Error connecting to Neo4j: {e}")
65
+ return None
66
+
67
+ return {"configurable": custom_config}
68
+
69
+ def generate_key_issues(user_query):
70
+ """Main function to generate key issues from Neo4j data"""
71
+ # Initialize application with API keys
72
+ init_app(
73
+ openai_key=OPENAI_API_KEY,
74
+ groq_key=GROQ_API_KEY,
75
+ langsmith_key=LANGSMITH_API_KEY
76
+ )
77
+
78
+ # Load configuration with custom parameters
79
+ config = load_config()
80
+ if not config:
81
+ return None
82
+
83
+ # Create status containers
84
+ plan_status = st.empty()
85
+ plan_display = st.empty()
86
+ retrieval_status = st.empty()
87
+ processing_status = st.empty()
88
+
89
+ # Build planner graph
90
+ plan_status.info("Building planner graph...")
91
+ graph = build_planner_graph(memory, config["configurable"])
92
+
93
+ # Execute initial prompt generation
94
+ plan_status.info(f"Generating plan for query: {user_query}")
95
+
96
+ messages_content = []
97
+ for event in graph.stream(get_initial_prompt(config, user_query), config, stream_mode="values"):
98
+ if "messages" in event:
99
+ event["messages"][-1].pretty_print()
100
+ messages_content.append(event["messages"][-1].content)
101
+
102
+ # Get the state with the generated plan
103
+ state = graph.get_state(config)
104
+ steps = [i for i in range(1, len(state.values['store_plan'])+1)]
105
+ plan_df = pd.DataFrame({'Plan steps': steps, 'Description': state.values['store_plan']})
106
+
107
+ # Display the plan
108
+ plan_status.success("Plan generation complete!")
109
+ plan_display.dataframe(plan_df, use_container_width=True)
110
+
111
+ # Continue with plan execution for document retrieval
112
+ retrieval_status.info("Retrieving documents...")
113
+ for event in graph.stream(None, config, stream_mode="values"):
114
+ if "messages" in event:
115
+ event["messages"][-1].pretty_print()
116
+ messages_content.append(event["messages"][-1].content)
117
+
118
+ # Get updated state after document retrieval
119
+ snapshot = graph.get_state(config)
120
+ doc_count = len(snapshot.values.get('valid_docs', []))
121
+ retrieval_status.success(f"Retrieved {doc_count} documents")
122
+
123
+ # Proceed to document processing
124
+ processing_status.info("Processing documents...")
125
+ process_steps = ["summarize"] # Using summarize as default processing step
126
+
127
+ # Update state to indicate human validation is complete and specify processing steps
128
+ graph.update_state(config, {'human_validated': True, 'process_steps': process_steps}, as_node="human_validation")
129
+
130
+ # Continue execution with document processing
131
+ for event in graph.stream(None, config, stream_mode="values"):
132
+ if "messages" in event:
133
+ event["messages"][-1].pretty_print()
134
+ messages_content.append(event["messages"][-1].content)
135
+
136
+ # Get final state after processing
137
+ final_snapshot = graph.get_state(config)
138
+ processing_status.success("Document processing complete!")
139
+
140
+ if "messages" in final_snapshot.values:
141
+ final_result = final_snapshot.values["messages"][-1].content
142
+ return final_result, final_snapshot.values.get('valid_docs', [])
143
+
144
+ return None, []
145
+
146
+ # App header
147
+ st.title("Key Issue Generator")
148
+ st.write("Generate key issues from a Neo4j knowledge graph using advanced language models.")
149
+
150
+ # Check database connectivity
151
+ connectivity_status = verify_neo4j_connectivity()
152
+ st.sidebar.header("Database Status")
153
+ if "Error" not in str(connectivity_status):
154
+ st.sidebar.success("Connected to Neo4j database")
155
+ else:
156
+ st.sidebar.error(f"Database connection issue: {connectivity_status}")
157
+
158
+ # User input section
159
+ st.header("Enter Your Query")
160
+ user_query = st.text_area("What would you like to explore?",
161
+ "What are the main challenges in AI adoption for healthcare systems?",
162
+ height=100)
163
+
164
+ # Process button
165
+ if st.button("Generate Key Issues", type="primary"):
166
+ if not OPENAI_API_KEY or not GROQ_API_KEY or not LANGSMITH_API_KEY or not NEO4J_PASSWORD:
167
+ st.error("Required API keys or database credentials are missing. Please check your environment variables.")
168
+ else:
169
+ with st.spinner("Processing your query..."):
170
+ start_time = time.time()
171
+ final_result, valid_docs = generate_key_issues(user_query)
172
+ end_time = time.time()
173
+
174
+ if final_result:
175
+ # Display execution time
176
+ st.sidebar.info(f"Total execution time: {round(end_time - start_time, 2)} seconds")
177
+
178
+ # Display final result
179
+ st.header("Generated Key Issues")
180
+ st.markdown(final_result)
181
+
182
+ # Option to download results
183
+ st.download_button(
184
+ label="Download Results",
185
+ data=final_result,
186
+ file_name="key_issues_results.txt",
187
+ mime="text/plain"
188
+ )
189
+
190
+ # Display retrieved documents in expandable section
191
+ if valid_docs:
192
+ with st.expander("View Retrieved Documents"):
193
+ for i, doc in enumerate(valid_docs):
194
+ st.markdown(f"### Document {i+1}")
195
+ for key in doc:
196
+ st.markdown(f"**{key}**: {doc[key]}")
197
+ st.divider()
198
+ else:
199
+ st.error("An error occurred during processing. Please check the logs for details.")
200
+
201
+ # Help information in sidebar
202
+ with st.sidebar:
203
+ st.header("About")
204
+ st.info("""
205
+ This application uses advanced language models to analyze a Neo4j knowledge graph and generate key issues
206
+ based on your query. The process involves:
207
+
208
+ 1. Creating a plan based on your query
209
+ 2. Retrieving relevant documents from the database
210
+ 3. Processing and summarizing the information
211
+ 4. Generating a comprehensive response
212
+ """)
config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "main_llm": "deepseek-r1-distill-llama-70b",
3
+ "plan_method": "generation",
4
+ "use_detailed_query": false,
5
+ "cypher_gen_method": "guided",
6
+ "validate_cypher": false,
7
+ "summarize_model": "deepseek-r1-distill-llama-70b",
8
+ "eval_method": "binary",
9
+ "eval_threshold":0.7,
10
+ "max_docs":15,
11
+ "compression_method":"llm_lingua",
12
+ "compress_rate":0.33,
13
+ "force_tokens":".",
14
+ "eval_model":"deepseek-r1-distill-llama-70b",
15
+ "graph": {"address": "neo4j+s://4985272f.databases.neo4j.io", "username":"neo4j", "password":"***"},
16
+ "openai_api_key": "***",
17
+ "groq_api_key":"***",
18
+ "langsmith_api_key":"***"
19
+ }
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain-community==0.3.0
2
+ sentence-transformers==3.1.0
3
+ openpyxl==3.1.5
4
+ langchain-groq==0.2.0
5
+ langchain-openai==0.2.0
6
+ langchain-core==0.3.1
7
+ llmlingua==0.2.2
8
+ langgraph>=0.2.4
9
+ langgraph-checkpoint-sqlite==1.0.3
10
+ langsmith==0.1.123
11
+ faiss-cpu==1.8.0.post1
12
+ neo4j==5.24.0
13
+ gradio==4.44.0
14
+ streamlit==1.31.0
15
+ pandas==2.1.3
16
+ pydantic==2.9.2