Spaces:

OrganizedProgrammers
/

kig_test

Sleeping

App Files Files Community

adrienbrdne commited on Apr 15

Commit

aacc458

verified ·

1 Parent(s): 42c00ab

Update ki_gen/data_retriever.py

Browse files

Files changed (1) hide show

ki_gen/data_retriever.py +182 -121

ki_gen/data_retriever.py CHANGED Viewed

@@ -6,7 +6,12 @@ import time
 from random import shuffle, sample
 from langgraph.checkpoint.sqlite import SqliteSaver
-from langchain_groq import ChatGroq
 from langchain_openai import ChatOpenAI
 from langchain_core.messages import HumanMessage
 from langchain_community.graphs import Neo4jGraph
@@ -15,7 +20,7 @@ from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.pydantic_v1 import Field
 from pydantic import BaseModel
-from langchain_groq import ChatGroq
 from langgraph.graph import StateGraph
@@ -28,11 +33,11 @@ from ki_gen.prompts import (
     SCORE_GRADER_PROMPT,
     RELEVANT_CONCEPTS_PROMPT,
 )
-from ki_gen.utils import ConfigSchema, DocRetrieverState, get_model, format_doc
 def extract_cypher(text: str) -> str:
     """Extract Cypher code from a text.
@@ -55,33 +60,28 @@ def extract_cypher(text: str) -> str:
         text
     ]
-def get_cypher_gen_chain(model: str = "deepseek-r1-distill-llama-70b"):
     """
     Returns cypher gen chain using specified model for generation
     This is used when the 'auto' cypher generation method has been configured
     """
-    if model=="openai":
-        llm_cypher_gen = ChatOpenAI(model='gpt-4o', base_url="https://llm.synapse.thalescloud.io/")
-    else:
-        llm_cypher_gen = ChatGroq(model = "deepseek-r1-distill-llama-70b")
     cypher_gen_chain = CYPHER_GENERATION_PROMPT | llm_cypher_gen | StrOutputParser() | extract_cypher
     return cypher_gen_chain
-def get_concept_selection_chain(model: str = "deepseek-r1-distill-llama-70b"):
     """
     Returns a chain to select the most relevant topic using specified model for generation.
     This is used when the 'guided' cypher generation method has been configured
     """
-    if model == "openai":
-        llm_topic_selection = ChatOpenAI(model='gpt-4o', base_url="https://llm.synapse.thalescloud.io/")
-    else:
-        llm_topic_selection = ChatGroq(model="deepseek-r1-distill-llama-70b")
     print(f"FOUND LLM TOPIC SELECTION FOR THE CONCEPT SELECTION PROMPT : {llm_topic_selection}")
     topic_selection_chain = CONCEPT_SELECTION_PROMPT | llm_topic_selection | StrOutputParser()
     return topic_selection_chain
 def get_concepts(graph: Neo4jGraph):
     concept_cypher = "MATCH (c:Concept) return c"
     if isinstance(graph, Neo4jGraph):
@@ -93,37 +93,34 @@ def get_concepts(graph: Neo4jGraph):
     concepts_name = [concept['c']['name'] for concept in concepts]
     return concepts_name
 def get_related_concepts(graph: Neo4jGraph, question: str):
     concepts = get_concepts(graph)
-    llm = get_model()
     print(f"this is the llm variable : {llm}")
     def parse_answer(llm_answer : str):
         try:
             print(f"This the llm_answer : {llm_answer}")
             return re.split("\n(?:\d)+\.\s", llm_answer.split("Concepts:")[1])[1:]
-        except:
-            return "No concept"
     related_concepts_chain = RELEVANT_CONCEPTS_PROMPT | llm | StrOutputParser() | parse_answer
     print(f"This is the question of the user : {question}")
     print(f"This is the concepts of the user : {concepts}")
-#groq.APIStatusError: Error code: 413 - {'error': {'message': 'Request too large for model `deepseek-r1-distill-llama-70b` in organization `org_01j6xywkndffv96m3wgh81jm49` on tokens per minute
-#  (TPM): Limit 5000, Requested 17099, please reduce your message size and try again. Visit https://console.groq.com/docs/rate-limits for more information.',
-#  'type': 'tokens', 'code': 'rate_limit_exceeded'}}
     try:
         related_concepts_raw = related_concepts_chain.invoke({"user_query" : question, "concepts" : '\n'.join(concepts)})
         print(f"related_concepts_raw : {related_concepts_raw}")
     except Exception as e:
-        if e.status_code == 413:
-            msg = e.body["error"]["message"]
-            print(f"question is : {question}")
-            print(type(question))
-            error_question = ["user_query", question]
-            related_concepts_raw = error_concept_groq(msg,concepts,related_concepts_chain,error_question)
-            pass
     # We clean up the list we received from the LLM in case there were some hallucinations
     related_concepts_cleaned = []
@@ -142,6 +139,7 @@ def get_related_concepts(graph: Neo4jGraph, question: str):
     # TODO : Add concepts found via similarity search
     return related_concepts_cleaned
 def build_concept_string(graph: Neo4jGraph, concept_list: list[str]):
     concept_string = ""
     for concept in concept_list:
@@ -163,6 +161,7 @@ def get_global_concepts(graph: Neo4jGraph):
     concepts_name = [concept['gc']['name'] for concept in concepts]
     return concepts_name
 def generate_cypher(state: DocRetrieverState, config: ConfigSchema):
     """
     The node where the cypher is generated
@@ -180,30 +179,40 @@ def generate_cypher(state: DocRetrieverState, config: ConfigSchema):
             "concepts": related_concepts
         })
-    try :
         if config["configurable"].get("cypher_gen_method") == 'guided':
             concept_selection_chain = get_concept_selection_chain()
             print(f"Concept selection chain is : {concept_selection_chain}")
             selected_topic = concept_selection_chain.invoke({"question" : question, "concepts": get_concepts(graph)})
             print(f"Selected topic are : {selected_topic}")
-    except Exception as e:
-        error_question = ["question", question]
-        selected_topic = error_concept_groq(e.body["error"]["message"],get_concepts(graph),concept_selection_chain,error_question)
-        pass
-        if config["configurable"].get("cypher_gen_method") == 'guided':
             cyphers = [generate_cypher_from_topic(selected_topic, state['current_plan_step'])]
             print(f"Cyphers are : {cyphers}")
     if config["configurable"].get("validate_cypher"):
-        corrector_schema = [Schema(el["start"], el["type"], el["end"]) for el in graph.structured_schema.get("relationships")]
-        cypher_corrector = CypherQueryCorrector(corrector_schema)
-        cyphers = [cypher_corrector(cypher) for cypher in cyphers]
     return {"cyphers" : cyphers}
 def generate_cypher_from_topic(selected_concept: str, plan_step: int):
     """
     Helper function used when the 'guided' cypher generation method has been configured
@@ -226,36 +235,54 @@ def get_docs(state:DocRetrieverState, config:ConfigSchema):
     """
     graph = config["configurable"].get("graph")
     output = []
-    if graph is not None:
         for cypher in state["cyphers"]:
             try:
                 output = graph.query(cypher)
-                break
             except Exception as e:
-                print("Failed to retrieve docs : {e}")
     # Clean up the docs we received as there may be duplicates depending on the cypher query
     all_docs = []
     for doc in output:
         unwinded_doc = {}
-        for key in doc:
-            if isinstance(doc[key], dict):
-                all_docs.append(doc[key])
-            else:
-                unwinded_doc.update({key: doc[key]})
-        if unwinded_doc:
             all_docs.append(unwinded_doc)
     filtered_docs = []
-    for doc in all_docs:
-        if doc not in filtered_docs:
-            filtered_docs.append(doc)
-    return {"docs": filtered_docs}
 # Data model
@@ -266,22 +293,25 @@ class GradeDocumentsBinary(BaseModel):
         description="Documents are relevant to the question, 'yes' or 'no'"
     )
-# LLM with function call
-# llm_grader_binary = ChatGroq(model="deepseek-r1-distill-llama-70b", temperature=0)
-def get_binary_grader(model="deepseek-r1-distill-llama-70b"):
     """
     Returns a binary grader to evaluate relevance of documents using specified model for generation
     This is used when the 'binary' evaluation method has been configured
     """
-    if model == "gpt-4o":
-        llm_grader_binary = ChatOpenAI(model='gpt-4o', base_url="https://llm.synapse.thalescloud.io/", temperature=0)
-    else:
-        llm_grader_binary = ChatGroq(model="deepseek-r1-distill-llama-70b", temperature=0)
-    structured_llm_grader_binary = llm_grader_binary.with_structured_output(GradeDocumentsBinary)
-    retrieval_grader_binary = BINARY_GRADER_PROMPT | structured_llm_grader_binary
     return retrieval_grader_binary
@@ -292,41 +322,58 @@ class GradeDocumentsScore(BaseModel):
         description="Documents are relevant to the question, score between 0 (completely irrelevant) and 1 (perfectly relevant)"
     )
-def get_score_grader(model="deepseek-r1-distill-llama-70b"):
     """
     Returns a score grader to evaluate relevance of documents using specified model for generation
     This is used when the 'score' evaluation method has been configured
     """
-    if model == "gpt-4o":
-        llm_grader_score = ChatOpenAI(model='gpt-4o', base_url="https://llm.synapse.thalescloud.io/", temperature=0)
-    else:
-        llm_grader_score = ChatGroq(model="deepseek-r1-distill-llama-70b", temperature = 0)
-    structured_llm_grader_score = llm_grader_score.with_structured_output(GradeDocumentsScore)
-    retrieval_grader_score = SCORE_GRADER_PROMPT | structured_llm_grader_score
-    return retrieval_grader_score
-def eval_doc(doc, query, method="binary", threshold=0.7, eval_model="deepseek-r1-distill-llama-70b"):
     '''
     doc : the document to evaluate
     query : the query to which to doc shoud be relevant
     method : "binary" or "score"
     threshold : for "score" method, score above which a doc is considered relevant
     '''
-    if method == "binary":
-        retrieval_grader_binary = get_binary_grader(model=eval_model)
-        return 1 if (retrieval_grader_binary.invoke({"question": query, "document":doc}).binary_score == 'yes') else 0
-    elif method == "score":
-        retrieval_grader_score = get_score_grader(model=eval_model)
-        score = retrieval_grader_score.invoke({"query": query, "document":doc}).score or None
-        if score is not None:
-            return score if score >= threshold else 0
         else:
-            # Couldn't parse score, marking document as relevant by default
-            return 1
-    else:
-        raise ValueError("Invalid method")
 def eval_docs(state: DocRetrieverState, config: ConfigSchema):
     """
     This node performs evaluation of the retrieved docs and
@@ -334,29 +381,63 @@ def eval_docs(state: DocRetrieverState, config: ConfigSchema):
     eval_method =  config["configurable"].get("eval_method") or "binary"
     MAX_DOCS = config["configurable"].get("max_docs") or 15
     valid_doc_scores = []
-    for doc in sample(state["docs"], min(25, len(state["docs"]))):
         score = eval_doc(
-                        doc=format_doc(doc),
                         query=state["query"],
                         method=eval_method,
                         threshold=config["configurable"].get("eval_threshold") or 0.7,
-                        eval_model = config["configurable"].get("eval_model") or "deepseek-r1-distill-llama-70b"
                         )
-        if score:
-            valid_doc_scores.append((doc, score))
     if eval_method == 'score':
         # Get at most MAX_DOCS items with the highest score if score method was used
-        valid_docs = sorted(valid_doc_scores, key=lambda x: x[1])
-        valid_docs = [valid_doc[0] for valid_doc in valid_docs[:MAX_DOCS]]
     else:
         # Get at mots MAX_DOCS items at random if binary method was used
         shuffle(valid_doc_scores)
         valid_docs = [valid_doc[0] for valid_doc in valid_doc_scores[:MAX_DOCS]]
-    return {"valid_docs": valid_docs + (state["valid_docs"] or [])}
@@ -382,25 +463,5 @@ def build_data_retriever_graph(memory):
     return graph_doc_retriever
-def error_concept_groq(msg,concepts,groq,question):
-    try:
-        start = msg.find("Requested") + len("Requested ")
-        end = msg.find(",", start)
-        rate_limit = int(msg[start:end])
-        related_concepts = []
-        i = 0
-        start = 0
-        end = len(concepts) // (rate_limit // 5000 + (1 if rate_limit%4500 != 0 else 0))
-        while (i < rate_limit // 5000):
-            smaller_concepts =  concepts[start:end]
-            start = end
-            end = end + len(concepts) // (rate_limit//5000 + (1 if rate_limit%4500 != 0 else 0))
-            res = groq.invoke({question[0] : question[1], "concepts" : '\n'.join(smaller_concepts)})
-            for r in res:
-                related_concepts.append(r)
-            i+=1
-        return related_concepts
-    except Exception as e:
-        if e.status_code == 419:
-            time.sleep(65)
-            error_concept_groq(msg,concepts,groq,question)

 from random import shuffle, sample
 from langgraph.checkpoint.sqlite import SqliteSaver
+# Remove ChatGroq import
+# from langchain_groq import ChatGroq
+# Add ChatGoogleGenerativeAI import
+from langchain_google_genai import ChatGoogleGenerativeAI
+import os # Add os import
 from langchain_openai import ChatOpenAI
 from langchain_core.messages import HumanMessage
 from langchain_community.graphs import Neo4jGraph
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.pydantic_v1 import Field
 from pydantic import BaseModel
 from langgraph.graph import StateGraph
     SCORE_GRADER_PROMPT,
     RELEVANT_CONCEPTS_PROMPT,
 )
+# Import get_model which now handles Gemini
+from ki_gen.utils import ConfigSchema, DocRetrieverState, get_model, format_doc
+# ... (extract_cypher remains the same)
 def extract_cypher(text: str) -> str:
     """Extract Cypher code from a text.
         text
     ]
+# Update default model and use get_model
+def get_cypher_gen_chain(model: str = "gemini-2.0-flash"):
     """
     Returns cypher gen chain using specified model for generation
     This is used when the 'auto' cypher generation method has been configured
     """
+    llm_cypher_gen = get_model(model)
     cypher_gen_chain = CYPHER_GENERATION_PROMPT | llm_cypher_gen | StrOutputParser() | extract_cypher
     return cypher_gen_chain
+# Update default model and use get_model
+def get_concept_selection_chain(model: str = "gemini-2.0-flash"):
     """
     Returns a chain to select the most relevant topic using specified model for generation.
     This is used when the 'guided' cypher generation method has been configured
     """
+    llm_topic_selection = get_model(model)
     print(f"FOUND LLM TOPIC SELECTION FOR THE CONCEPT SELECTION PROMPT : {llm_topic_selection}")
     topic_selection_chain = CONCEPT_SELECTION_PROMPT | llm_topic_selection | StrOutputParser()
     return topic_selection_chain
+# ... (get_concepts remains the same)
 def get_concepts(graph: Neo4jGraph):
     concept_cypher = "MATCH (c:Concept) return c"
     if isinstance(graph, Neo4jGraph):
     concepts_name = [concept['c']['name'] for concept in concepts]
     return concepts_name
+# Update to use get_model, remove Groq error handling
 def get_related_concepts(graph: Neo4jGraph, question: str):
     concepts = get_concepts(graph)
+    # Use get_model
+    llm = get_model()
     print(f"this is the llm variable : {llm}")
     def parse_answer(llm_answer : str):
         try:
             print(f"This the llm_answer : {llm_answer}")
+            # Adjust parsing if Gemini output format differs
             return re.split("\n(?:\d)+\.\s", llm_answer.split("Concepts:")[1])[1:]
+        except Exception as e:
+             print(f"Error parsing LLM concept answer: {e}")
+             return [] # Return empty list on parsing error
     related_concepts_chain = RELEVANT_CONCEPTS_PROMPT | llm | StrOutputParser() | parse_answer
     print(f"This is the question of the user : {question}")
     print(f"This is the concepts of the user : {concepts}")
+    # Remove specific Groq error handling block
     try:
         related_concepts_raw = related_concepts_chain.invoke({"user_query" : question, "concepts" : '\n'.join(concepts)})
         print(f"related_concepts_raw : {related_concepts_raw}")
     except Exception as e:
+        # Add generic error handling/logging for Gemini if needed
+        print(f"Error invoking related concepts chain: {e}")
+        related_concepts_raw = [] # Assign empty list on error
     # We clean up the list we received from the LLM in case there were some hallucinations
     related_concepts_cleaned = []
     # TODO : Add concepts found via similarity search
     return related_concepts_cleaned
+# ... (build_concept_string, get_global_concepts remain the same)
 def build_concept_string(graph: Neo4jGraph, concept_list: list[str]):
     concept_string = ""
     for concept in concept_list:
     concepts_name = [concept['gc']['name'] for concept in concepts]
     return concepts_name
+# Update concept selection error handling
 def generate_cypher(state: DocRetrieverState, config: ConfigSchema):
     """
     The node where the cypher is generated
             "concepts": related_concepts
         })
+    # Remove specific Groq error handling block
+    try:
         if config["configurable"].get("cypher_gen_method") == 'guided':
             concept_selection_chain = get_concept_selection_chain()
             print(f"Concept selection chain is : {concept_selection_chain}")
             selected_topic = concept_selection_chain.invoke({"question" : question, "concepts": get_concepts(graph)})
             print(f"Selected topic are : {selected_topic}")
             cyphers = [generate_cypher_from_topic(selected_topic, state['current_plan_step'])]
             print(f"Cyphers are : {cyphers}")
+    except Exception as e:
+         # Add generic error handling/logging for Gemini if needed
+         print(f"Error during guided cypher generation: {e}")
+         cyphers = [] # Assign empty list on error
     if config["configurable"].get("validate_cypher"):
+        # Ensure graph schema is correctly fetched if needed
+        if graph and hasattr(graph, 'structured_schema'):
+            corrector_schema = [Schema(el["start"], el["type"], el["end"]) for el in graph.structured_schema.get("relationships", [])]
+            cypher_corrector = CypherQueryCorrector(corrector_schema)
+            # Apply corrector only if cyphers were generated
+            if cyphers:
+                try:
+                    cyphers = [cypher_corrector(cypher) for cypher in cyphers]
+                except Exception as corr_e:
+                    print(f"Error during cypher correction: {corr_e}")
+                    # Decide how to handle correction errors, maybe keep original cyphers
+        else:
+            print("Warning: Cypher validation skipped, graph or schema unavailable.")
     return {"cyphers" : cyphers}
+# ... (generate_cypher_from_topic, get_docs remain the same)
 def generate_cypher_from_topic(selected_concept: str, plan_step: int):
     """
     Helper function used when the 'guided' cypher generation method has been configured
     """
     graph = config["configurable"].get("graph")
     output = []
+    if graph is not None and state.get("cyphers"): # Check if cyphers exist
         for cypher in state["cyphers"]:
             try:
                 output = graph.query(cypher)
+                # Assuming the first successful query is sufficient
+                if output:
+                    break
             except Exception as e:
+                print(f"Failed to retrieve docs with cypher '{cypher}': {e}")
+                # Continue to try next cypher if one fails
     # Clean up the docs we received as there may be duplicates depending on the cypher query
     all_docs = []
     for doc in output:
         unwinded_doc = {}
+        # Ensure doc is a dictionary before iterating
+        if isinstance(doc, dict):
+            for key in doc:
+                if isinstance(doc[key], dict):
+                    # If a value is a dict, treat it as a separate document
+                    all_docs.append(doc[key])
+                else:
+                    unwinded_doc.update({key: doc[key]})
+        # Add the unwinded parts if any keys were not dictionaries
+        if unwinded_doc:
             all_docs.append(unwinded_doc)
     filtered_docs = []
+    seen_docs = set() # Use a set for faster duplicate checking based on a unique identifier
+    for doc in all_docs:
+         # Create a tuple of items to check for duplicates, assuming dicts are hashable
+         # If dicts contain unhashable types (like lists), convert them to strings or use a primary key
+        try:
+            doc_tuple = tuple(sorted(doc.items()))
+            if doc_tuple not in seen_docs:
+                filtered_docs.append(doc)
+                seen_docs.add(doc_tuple)
+        except TypeError:
+             # Handle cases where doc items are not hashable (e.g., contain lists/dicts)
+             # Fallback: convert doc to string for uniqueness check (less reliable)
+             doc_str = str(sorted(doc.items()))
+             if doc_str not in seen_docs:
+                  filtered_docs.append(doc)
+                  seen_docs.add(doc_str)
+    return {"docs": filtered_docs}
 # Data model
         description="Documents are relevant to the question, 'yes' or 'no'"
     )
+# Update default model and use get_model
+def get_binary_grader(model="gemini-2.0-flash"):
     """
     Returns a binary grader to evaluate relevance of documents using specified model for generation
     This is used when the 'binary' evaluation method has been configured
     """
+    llm_grader_binary = get_model(model)
+    # Check if the model supports structured output, otherwise use standard invocation
+    try:
+        # Attempt to get structured output
+        structured_llm_grader_binary = llm_grader_binary.with_structured_output(GradeDocumentsBinary)
+        retrieval_grader_binary = BINARY_GRADER_PROMPT | structured_llm_grader_binary
+    except NotImplementedError:
+         print(f"Warning: Model {model} may not support structured output directly for binary grading. Falling back.")
+         # Fallback: parse the string output if structured output fails
+         from langchain_core.output_parsers import SimpleJsonOutputParser
+         # You might need to adjust the prompt to explicitly ask for JSON
+         retrieval_grader_binary = BINARY_GRADER_PROMPT | llm_grader_binary | SimpleJsonOutputParser() # Or StrOutputParser and manual parsing
     return retrieval_grader_binary
         description="Documents are relevant to the question, score between 0 (completely irrelevant) and 1 (perfectly relevant)"
     )
+# Update default model and use get_model
+def get_score_grader(model="gemini-2.0-flash"):
     """
     Returns a score grader to evaluate relevance of documents using specified model for generation
     This is used when the 'score' evaluation method has been configured
     """
+    llm_grader_score = get_model(model)
+    # Check if the model supports structured output
+    try:
+        structured_llm_grader_score = llm_grader_score.with_structured_output(GradeDocumentsScore)
+        retrieval_grader_score = SCORE_GRADER_PROMPT | structured_llm_grader_score
+    except NotImplementedError:
+        print(f"Warning: Model {model} may not support structured output directly for score grading. Falling back.")
+        # Fallback: parse the string output if structured output fails
+        from langchain_core.output_parsers import SimpleJsonOutputParser
+        # Adjust prompt if needed
+        retrieval_grader_score = SCORE_GRADER_PROMPT | llm_grader_score | SimpleJsonOutputParser() # Or StrOutputParser and manual parsing
+    return retrieval_grader_score
+# Update default model
+def eval_doc(doc, query, method="binary", threshold=0.7, eval_model="gemini-2.0-flash"):
     '''
     doc : the document to evaluate
     query : the query to which to doc shoud be relevant
     method : "binary" or "score"
     threshold : for "score" method, score above which a doc is considered relevant
     '''
+    try:
+        if method == "binary":
+            retrieval_grader_binary = get_binary_grader(model=eval_model)
+            result = retrieval_grader_binary.invoke({"question": query, "document":doc})
+            # Handle both structured and parsed output
+            binary_score = result.binary_score if isinstance(result, GradeDocumentsBinary) else result.get("binary_score", "no")
+            return 1 if (binary_score.lower() == 'yes') else 0
+        elif method == "score":
+            retrieval_grader_score = get_score_grader(model=eval_model)
+            result = retrieval_grader_score.invoke({"query": query, "document":doc})
+            # Handle both structured and parsed output
+            score = result.score if isinstance(result, GradeDocumentsScore) else result.get("score")
+            if score is not None:
+                return score if float(score) >= threshold else 0
+            else:
+                print("Warning: Couldn't parse score, marking document as relevant by default.")
+                return 1 # Default to relevant if score parsing fails
         else:
+            raise ValueError("Invalid method")
+    except Exception as e:
+        print(f"Error evaluating document: {e}")
+        return 0 # Default to irrelevant on error
+# Update default model
 def eval_docs(state: DocRetrieverState, config: ConfigSchema):
     """
     This node performs evaluation of the retrieved docs and
     eval_method =  config["configurable"].get("eval_method") or "binary"
     MAX_DOCS = config["configurable"].get("max_docs") or 15
+    # Update default model name
+    eval_model_name = config["configurable"].get("eval_model") or "gemini-2.0-flash"
     valid_doc_scores = []
+    # Ensure 'docs' exists and is a list
+    docs_to_evaluate = state.get("docs", [])
+    if not isinstance(docs_to_evaluate, list):
+        print("Warning: 'docs' is not a list, skipping evaluation.")
+        docs_to_evaluate = []
+    # Sample safely
+    sample_size = min(25, len(docs_to_evaluate))
+    sampled_docs = sample(docs_to_evaluate, sample_size) if sample_size > 0 else []
+    for doc in sampled_docs:
+         # Ensure doc is not None before formatting
+        if doc is None:
+            print("Warning: Encountered None document during evaluation, skipping.")
+            continue
+        formatted_doc_str = format_doc(doc)
+        # Add basic check for empty formatted doc
+        if not formatted_doc_str.strip():
+             print(f"Warning: Skipping empty formatted document: {doc}")
+             continue
         score = eval_doc(
+                        doc=formatted_doc_str,
                         query=state["query"],
                         method=eval_method,
                         threshold=config["configurable"].get("eval_threshold") or 0.7,
+                        eval_model=eval_model_name # Pass the eval_model name
                         )
+        # Ensure score is numeric before appending
+        if isinstance(score, (int, float)):
+            if score > 0: # Only add if relevant (score > 0 or binary score == 1)
+                 valid_doc_scores.append((doc, score))
+        else:
+             print(f"Warning: Received non-numeric score ({score}) for doc {doc}, skipping.")
     if eval_method == 'score':
         # Get at most MAX_DOCS items with the highest score if score method was used
+        valid_docs_sorted = sorted(valid_doc_scores, key=lambda x: x[1], reverse=True) # Sort descending
+        valid_docs = [valid_doc[0] for valid_doc in valid_docs_sorted[:MAX_DOCS]]
     else:
         # Get at mots MAX_DOCS items at random if binary method was used
         shuffle(valid_doc_scores)
         valid_docs = [valid_doc[0] for valid_doc in valid_doc_scores[:MAX_DOCS]]
+    # Ensure existing valid_docs is a list before concatenating
+    existing_valid_docs = state.get("valid_docs", [])
+    if not isinstance(existing_valid_docs, list):
+        existing_valid_docs = []
+    return {"valid_docs": valid_docs + existing_valid_docs}
     return graph_doc_retriever
+# Remove Groq specific error handling function
+# def error_concept_groq(msg,concepts,groq,question): ...