DeepakKolhe1995 commited on
Commit
b30095c
·
verified ·
1 Parent(s): 2b794b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -7
app.py CHANGED
@@ -13,6 +13,7 @@ from langchain_aws import ChatBedrock
13
  from langchain_core.prompts import ChatPromptTemplate
14
  from langchain_core.runnables import RunnablePassthrough
15
  from langchain_core.output_parsers import StrOutputParser
 
16
  import re
17
  import json
18
 
@@ -50,10 +51,15 @@ def load_wikipedia_documents():
50
  @st.cache_resource
51
  def split_documents(_documents):
52
  """Split documents into chunks."""
53
- splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
54
- chunks = splitter.split_documents(_documents)
55
- logger.info(f"Split into {len(chunks)} chunks")
56
- return chunks
 
 
 
 
 
57
 
58
  @st.cache_resource
59
  def initialize_embeddings():
@@ -72,16 +78,42 @@ def initialize_embeddings():
72
 
73
  @st.cache_resource
74
  def store_in_qdrant(_chunks, _embeddings):
75
- """Store document chunks in a hosted Qdrant instance."""
76
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  vector_store = Qdrant.from_documents(
78
  documents=_chunks,
79
  embedding=_embeddings,
80
  url=os.getenv("QDRANT_URL"),
81
  api_key=os.getenv("QDRANT_API_KEY"),
82
- collection_name="wikipedia_chunks"
83
  )
84
- logger.info(f"Stored {len(_chunks)} chunks in Qdrant at {os.getenv('QDRANT_URL')}")
 
 
 
 
 
 
 
 
 
 
 
85
  return vector_store
86
  except Exception as e:
87
  logger.error(f"Error storing in Qdrant: {e}")
 
13
  from langchain_core.prompts import ChatPromptTemplate
14
  from langchain_core.runnables import RunnablePassthrough
15
  from langchain_core.output_parsers import StrOutputParser
16
+ from qdrant_client import QdrantClient
17
  import re
18
  import json
19
 
 
51
  @st.cache_resource
52
  def split_documents(_documents):
53
  """Split documents into chunks."""
54
+ try:
55
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
56
+ chunks = splitter.split_documents(_documents)
57
+ logger.info(f"Split into {len(chunks)} chunks")
58
+ return chunks
59
+ except Exception as e:
60
+ logger.error(f"Error splitting documents: {e}")
61
+ st.error(f"Failed to split documents: {e}")
62
+ return []
63
 
64
  @st.cache_resource
65
  def initialize_embeddings():
 
78
 
79
  @st.cache_resource
80
  def store_in_qdrant(_chunks, _embeddings):
81
+ """Store document chunks in a hosted Qdrant instance after deleting existing collection."""
82
  try:
83
+ # Initialize Qdrant client
84
+ client = QdrantClient(
85
+ url=os.getenv("QDRANT_URL"),
86
+ api_key=os.getenv("QDRANT_API_KEY")
87
+ )
88
+
89
+ # Delete existing collection if it exists
90
+ collection_name = "wikipedia_chunks"
91
+ try:
92
+ client.delete_collection(collection_name)
93
+ logger.info(f"Deleted existing Qdrant collection: {collection_name}")
94
+ except Exception as e:
95
+ logger.warning(f"No existing collection {collection_name} to delete or error: {e}")
96
+
97
+ # Create and populate new collection
98
  vector_store = Qdrant.from_documents(
99
  documents=_chunks,
100
  embedding=_embeddings,
101
  url=os.getenv("QDRANT_URL"),
102
  api_key=os.getenv("QDRANT_API_KEY"),
103
+ collection_name=collection_name
104
  )
105
+
106
+ # Verify storage by checking collection size
107
+ collection_info = client.get_collection(collection_name)
108
+ stored_points = collection_info.points_count
109
+ logger.info(f"Stored {stored_points} chunks in Qdrant at {os.getenv('QDRANT_URL')}")
110
+ if stored_points == 0:
111
+ logger.error("No documents stored in Qdrant collection")
112
+ st.error("No documents stored in Qdrant collection")
113
+ return None
114
+ if stored_points != len(_chunks):
115
+ logger.warning(f"Expected {len(_chunks)} chunks, but stored {stored_points} in Qdrant")
116
+
117
  return vector_store
118
  except Exception as e:
119
  logger.error(f"Error storing in Qdrant: {e}")