Spaces:

Agents-MCP-Hackathon
/

mcp-data-analyst

Running

App Files Files Community

Jacqkues commited on Jun 8

Commit

d25ee4b

verified ·

1 Parent(s): c555a85

Upload 11 files

Browse files

Files changed (11) hide show

agent.py +62 -0
app.py +131 -0
database.py +107 -0
filesource.py +126 -0
modal/rerank_service.py +106 -0
modal/rerank_service_vllm.py +136 -0
modal/vllm_service.py +62 -0
requirements.txt +111 -0
services/__init__.py +24 -0
services/ai.py +116 -0
services/utils.py +45 -0

agent.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from services.ai import rank_tables,generate_sql,generate_answer , correct_sql , evaluate_difficulty
+from services.utils import filter_tables
+from openai import OpenAI
+from database import Database
+from filesource import FileSource
+import os
+MAX_TABLE = 3
+client = OpenAI(
+    base_url=os.getenv("LLM_ENDPOINT"),
+    api_key=os.getenv("LLM_KEY")
+)
+def run_agent(database,prompt):
+    retry = 5
+    tables = database.get_tables_array()
+    use_thinking = False
+    if len(tables) > MAX_TABLE:
+        print(f"using reranking because number of tables is greater than {MAX_TABLE}")
+        ranked = rank_tables(prompt,tables)
+        tables = filter_tables(0,ranked)[:MAX_TABLE]
+    dif = int(evaluate_difficulty(client,prompt))
+    if dif > 7:
+        print("difficulty is > 7 so we enable thinking mode")
+        use_thinking = True
+    sql = generate_sql(client,prompt,tables,use_thinking)
+    nb_try = 0
+    success = False
+    while nb_try < retry and not success:
+        nb_try = nb_try + 1
+        try:
+            print("try to launch sql request")
+            result = database.query(sql)
+            success = True
+        except Exception as e:
+            print(f"Error : {e}")
+            print("Try to self correct...")
+            error = f"{type(e).__name__} - {str(e)}"
+            if nb_try < retry - 2:
+                sql = correct_sql(client,prompt,sql,tables,error,True)
+            else:
+                sql = correct_sql(client,prompt,sql,tables,error,False)
+            print(sql)
+    if success:
+        print(result.to_markdown())
+        return generate_answer(client,sql,prompt,result.to_markdown(),use_thinking)
+# db = Database("mysql://user:password@localhost:3306/Pokemon")
+# db.connect()
+# file = FileSource("./Wines.csv")
+# file.connect()
+# print(run_agent(file,"What is the quality og the win with the less of alcohol ?"))

app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import gradio as gr
+from database import Database
+from filesource import FileSource
+from agent import run_agent
+from services.utils import get_db_scheme_from_uri
+source = None
+def connect_to_file(file):
+    global source
+    try:
+        source = FileSource(file.name)
+        status = source.connect()
+        schema = source._pretify_schema()
+        status = "Connection successful!"
+    except Exception as e:
+        schema = ""
+        status = f"Error: {str(e)}"
+    return schema, status
+def connect_to_database(db_url):
+        global source
+        try:
+            dialect = get_db_scheme_from_uri(db_url)
+            source = Database(db_url,dialect)
+            status = source.connect()
+            schema = source._pretify_schema()
+            status = "Connection successful!"
+        except Exception as e:
+            schema = ""
+            status = f"Error: {str(e)}"
+        return schema, status
+# Function to add user message to chat history
+def user(user_message, chat_history):
+    chat_history.append({"role": "user", "content": user_message})
+    return "", chat_history
+# Function to generate a bot response
+def bot(chat_history):
+    if source is None:
+         chat_history.append({"role":"assistant","content":"please connect to a database  before asking question"})
+         yield chat_history
+    else:
+        answer = run_agent(source,chat_history[-1]['content'])
+        chat_history.append({"role":"assistant","content":""})
+        for chunk in answer:
+            chat_history[-1]['content'] += chunk
+            yield chat_history
+# Create the Gradio interface
+with gr.Blocks(theme=gr.themes.Default(), css="""
+    .gr-button { margin: 5px; border-radius:16px; }
+    .gr-textbox, .gr-text-area, .gr-dropdown, .gr-json { border-radius: 8px; }
+    .gr-row { gap: 10px; }
+    .gr-tab { border-radius: 8px; }
+    .status-text { font-size: 0.9em; color: #555; }
+    .gr-json { max-height: 300px; overflow-y: auto; } /* Added scrolling for JSON */
+""") as demo:
+    gr.Markdown(
+        f"""
+        # 🤖 MCP DB Answer
+        Your mcp server that allow you to talk to any database
+        Powered by Ibis it support : PostgreSQL , SQLite , MySQL , MSSQL , ClickHouse , BigQuery and many other
+        Also support .CSV and .parquet files
+        """,
+        elem_classes=["header"]
+    )
+    with gr.Column(scale=3):
+        with gr.Tabs():
+            with gr.TabItem("💬 Chat"):
+                with gr.Group():
+                    main_chat_disp = gr.Chatbot(
+                        label=None, height=600,
+                        avatar_images=(None, "https://huggingface.co/spaces/Space-Share/bucket/resolve/main/images/pfp.webp"),
+                        show_copy_button=True, render_markdown=True, sanitize_html=True, type='messages'
+                    )
+                    with gr.Row(variant="compact"):
+                        user_msg_tb = gr.Textbox(
+                            show_label=False, placeholder="Talk with your data...",
+                            scale=7, lines=1, max_lines=3
+                        )
+                        send_btn = gr.Button("Send", variant="primary", scale=1, min_width=100)
+            with gr.TabItem("Config"):
+                with gr.Row():
+                    # Left column for database configuration.
+                    with gr.Column(scale=1):
+                        gr.Markdown("## Database Configuration")
+                        # Textbox for entering the database URL.
+                        db_url_tb = gr.Textbox(
+                            show_label=True, label="Database URL", placeholder="Enter the URL to connect to the database..."
+                        )
+                        # Button to connect to the database.
+                        connect_btn = gr.Button("Connect", variant="primary")
+                        file_uploader = gr.File(
+                            label="Upload File", file_types=[".csv", ".parquet", ".xls", ".xlsx"]
+                        )
+                        # Button to connect to the database.
+                        load_btn = gr.Button("Load", variant="primary")
+                    # Right column for displaying the database schema and status message.
+                    with gr.Column(scale=3):
+                        gr.Markdown("## Database Schema")
+                        # Textarea to display the database schema.
+                        schema_ta = gr.TextArea(
+                            show_label=False, placeholder="Database schema will be displayed here...",
+                            lines=20, max_lines=50, interactive=False
+                        )
+                        # Textbox to display the status message.
+                        status_tb = gr.Textbox(
+                            show_label=False, placeholder="Status message will be displayed here...",
+                            lines=1, max_lines=1, interactive=False, elem_classes=["status-text"]
+                        )
+    connect_btn.click(fn=connect_to_database, inputs=db_url_tb, outputs=[schema_ta, status_tb])
+    load_btn.click(fn=connect_to_file, inputs=file_uploader, outputs=[schema_ta, status_tb])
+    send_btn.click(fn=user, inputs=[user_msg_tb, main_chat_disp], outputs=[user_msg_tb, main_chat_disp], queue=False).then(
+        fn=bot, inputs=main_chat_disp, outputs=main_chat_disp
+    )
+if __name__ == "__main__":
+    demo.launch(mcp_server=True)

database.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import ibis
+import sqlglot
+from sqlglot import optimizer
+from sqlglot.optimizer import qualify
+from sqlglot.errors import OptimizeError, ParseError
+class Database:
+    def __init__(self,connection_url,engine_dialect = "mysql") -> None:
+        self._connect_url = connection_url
+        self.engine_dialect = engine_dialect
+        self._tables_docs = {}
+        self._table_exemple = {}
+    def connect(self):
+        try:
+            self._con = ibis.connect(self._connect_url)
+            return f"✅ Connection to {self._connect_url} OK!"
+        except Exception as e:
+            #raise f"❌ Connection failed: {type(e).__name__} - {str(e)}"
+            raise e
+    def _optimize_query(self,sql,schema):
+        optimized_expression = optimizer.optimize(sql, schema=schema, dialect=self.engine_dialect)
+        optimized_sql = optimized_expression.sql(dialect=self.engine_dialect)
+        return optimized_sql
+    def _pretify_table(self,table,columns):
+        out = ""
+        if table in self._tables_docs.keys():
+            out += f"## Documentation \n{self._tables_docs[table]}\n"
+        if table in self._table_exemple.keys():
+            out += f"## Exemple \n{self._table_exemple[table]}"
+        out += f"Table ({table}) with  {len(columns)} fields : \n"
+        for field in columns.keys():
+            out += f"\t{field} of type : {columns[field]}\n"
+        return out
+    def add_table_documentation(self,table_name,documentation):
+        self._tables_docs[table_name] = documentation
+    def add_table_exemple(self,table_name,exemples):
+        self._table_exemple[table_name] = exemples
+    def get_tables_array(self):
+        schema = self._build_schema()
+        array = []
+        for table in schema.keys():
+           array.append(self._pretify_table(table,schema[table]))
+        return array
+    def _pretify_schema(self):
+        out = ""
+        schema = self._build_schema()
+        for table in schema.keys():
+           out += self._pretify_table(table,schema[table])
+           out += "\n"
+        return out
+    def _build_schema(self):
+        tables = self._con.list_tables()
+        schema = {}
+        for table_name in tables:
+            try:
+                table_expr = self._con.table(table_name)
+                table_schema = table_expr.schema()
+                columns = {col: str(dtype) for col, dtype in table_schema.items()}
+                schema[table_name] = columns
+            except Exception as e:
+                print(f"Warning: Could not retrieve schema for table '{table_name}': {e}")
+        return schema
+    def query(self, sql_query):
+        schema = self._build_schema()
+        print(sql_query)
+        try:
+            expression = sqlglot.parse_one(sql_query, read=self.engine_dialect)
+        except Exception as e:
+            raise e
+        try:
+            optimized_query = self._optimize_query(expression, schema)
+            final_query = optimized_query
+        except Exception as e:
+            final_query = expression.sql(dialect=self.engine_dialect)
+        try:
+            expr = self._con.sql(final_query, dialect=self.engine_dialect)
+            result_df = expr.execute()
+            return result_df
+        except Exception as e:
+            raise e
+# db = Database("mysql://user:password@localhost:3306/Pokemon")
+# db.connect()
+# schema = db._build_schema()
+# db.add_table_documentation("Defense","This is a super table")
+# db.add_table_exemple("Defense","caca")
+# db.add_table_exemple("Joueur","ezofkzrfp")
+# for table in schema.keys():
+#     print(db._pretify_table(table,schema[table]))

filesource.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import ibis
+import sqlglot
+from sqlglot import optimizer
+from sqlglot.optimizer import qualify
+from sqlglot.errors import OptimizeError, ParseError
+from services.utils import extract_filename
+class FileSource:
+    def __init__(self,file_path,file_type="csv") -> None:
+        self.file_path = file_path
+        self.file_type = file_type.lower()
+        self._tables_docs = {}
+        self._table_exemple = {}
+        self.engine_dialect = "duckdb"
+    def connect(self):
+        try:
+            self._con = ibis.connect("duckdb://")
+            name = extract_filename(self.file_path)
+            ext = name.split(".")[1]
+            table = name.split(".")[0]
+            if ext == "csv":
+                self._table = self._con.read_csv(self.file_path,table_name=table)
+            elif ext == "parquet":
+                self._table = self._con.read_parquet(self.file_path,table_name=table)
+            self._schema = self._table.schema()
+            return f"✅ Connection to {name} OK!"
+        except Exception as e:
+            raise e
+    def _optimize_query(self,sql,schema):
+        optimized_expression = optimizer.optimize(sql, schema=schema, dialect=self.engine_dialect)
+        optimized_sql = optimized_expression.sql(dialect=self.engine_dialect)
+        return optimized_sql
+    def _pretify_table(self,table,columns):
+        out = ""
+        if table in self._tables_docs.keys():
+            out += f"## Documentation \n{self._tables_docs[table]}\n"
+        if table in self._table_exemple.keys():
+            out += f"## Exemple \n{self._table_exemple[table]}"
+        out += f"Table ({table}) with  {len(columns)} fields : \n"
+        for field in columns.keys():
+            out += f"\t{field} of type : {columns[field]}\n"
+        return out
+    def add_table_documentation(self,table_name,documentation):
+        self._tables_docs[table_name] = documentation
+    def add_table_exemple(self,table_name,exemples):
+        self._table_exemple[table_name] = exemples
+    def get_tables_array(self):
+        schema = self._build_schema()
+        array = []
+        for table in schema.keys():
+           array.append(self._pretify_table(table,schema[table]))
+        return array
+    def _pretify_schema(self):
+        out = ""
+        schema = self._build_schema()
+        for table in schema.keys():
+           out += self._pretify_table(table,schema[table])
+           out += "\n"
+        return out
+    def _build_schema(self):
+        tables = self._con.list_tables()
+        schema = {}
+        for table_name in tables:
+            try:
+                table_expr = self._con.table(table_name)
+                table_schema = table_expr.schema()
+                columns = {col: str(dtype) for col, dtype in table_schema.items()}
+                schema[table_name] = columns
+            except Exception as e:
+                print(f"Warning: Could not retrieve schema for table '{table_name}': {e}")
+        return schema
+    def query(self, sql_query):
+        schema = self._build_schema()
+        print(sql_query)
+        try:
+            expression = sqlglot.parse_one(sql_query, read=self.engine_dialect)
+        except Exception as e:
+            raise e
+        try:
+            optimized_query = self._optimize_query(expression, schema)
+            final_query = optimized_query
+        except Exception as e:
+            final_query = expression.sql(dialect=self.engine_dialect)
+        try:
+            expr = self._con.sql(final_query, dialect=self.engine_dialect)
+            result_df = expr.execute()
+            return result_df
+        except Exception as e:
+            raise e
+# db = Database("mysql://user:password@localhost:3306/Pokemon")
+# db.connect()
+# schema = db._build_schema()
+# db.add_table_documentation("Defense","This is a super table")
+# db.add_table_exemple("Defense","caca")
+# db.add_table_exemple("Joueur","ezofkzrfp")
+# for table in schema.keys():
+#     print(db._pretify_table(table,schema[table]))
+# file = FileSource("./Wines.csv")
+# file.connect()
+# schema = file._build_schema()
+# # db.add_table_exemple("Defense","caca")
+# # db.add_table_exemple("Joueur","ezofkzrfp")
+# for table in schema.keys():
+#      print(file._pretify_table(table,schema[table]))
+# res = file.query("SELECT * FROM Wines;")
+# print(len(res))

modal/rerank_service.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import modal
+MINUTES = 60  # seconds
+MODEL_REPO_ID = "Qwen/Qwen3-Reranker-4B"
+rerank_image = (
+    modal.Image.debian_slim(python_version="3.12")
+    .pip_install(
+        "transformers==4.51.0",
+        "huggingface_hub[hf_transfer]",
+        "fastapi[standard]",
+        "torch"
+    )
+    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
+)
+hf_cache_vol = modal.Volume.from_name("mcp-datascientist-model-weights-vol")
+with rerank_image.imports():
+    import torch
+    from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
+app = modal.App("qwen3-rerank-service")
+@app.function(image=rerank_image,volumes = {
+    "/root/.cache/huggingface":hf_cache_vol
+})
+def download_model():
+    from huggingface_hub import snapshot_download
+    loc = snapshot_download(repo_id=MODEL_REPO_ID)
+    print(f"Saved model to {loc}")
+@app.cls(image=rerank_image,gpu="A100-40GB",volumes = {
+    "/root/.cache/huggingface":hf_cache_vol
+})
+class RerankerService:
+    @modal.enter()
+    def load_model(self):
+        self.tokenizer =  AutoTokenizer.from_pretrained("Qwen/Qwen3-Reranker-4B", padding_side='left')
+        self.model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-Reranker-4B", torch_dtype=torch.float16, attn_implementation="flash_attention_2").cuda().eval()
+    @modal.method()
+    def rank(self,query,documents):
+        max_length = 8192
+        prefix = "<|im_start|>system\nJudge whether the Table will be usefull to create an sql request to answer the Query. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n"
+        suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
+        prefix_tokens = self.tokenizer.encode(prefix, add_special_tokens=False)
+        suffix_tokens = self.tokenizer.encode(suffix, add_special_tokens=False)
+        token_false_id = self.tokenizer.convert_tokens_to_ids("no")
+        token_true_id = self.tokenizer.convert_tokens_to_ids("yes")
+        def format_instruction(instruction, query, doc):
+            if instruction is None:
+                instruction = 'Given a web search query, retrieve relevant passages that answer the query'
+            return f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}"
+        def process_inputs(pairs):
+            inputs = self.tokenizer(
+                pairs, padding=False, truncation='longest_first',
+                return_attention_mask=False, max_length=max_length - len(prefix_tokens) - len(suffix_tokens)
+            )
+            for i, ele in enumerate(inputs['input_ids']):
+                inputs['input_ids'][i] = prefix_tokens + ele + suffix_tokens
+            inputs = self.tokenizer.pad(inputs, padding=True, return_tensors="pt", max_length=max_length)
+            for key in inputs:
+                inputs[key] = inputs[key].to(self.model.device)
+            return inputs
+        @torch.no_grad()
+        def compute_logits(inputs):
+            logits = self.model(**inputs).logits[:, -1, :]
+            true_vector = logits[:, token_true_id]
+            false_vector = logits[:, token_false_id]
+            batch_scores = torch.stack([false_vector, true_vector], dim=1)
+            batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
+            return batch_scores[:, 1].exp().tolist()
+        instruction = "Given a user query find the usefull tables in order to build an sql request"
+        pairs = [format_instruction(instruction, query, doc) for doc in documents]
+        inputs = process_inputs(pairs)
+        scores = compute_logits(inputs)
+        return scores
+@app.function(
+    image=modal.Image.debian_slim(python_version="3.12")
+    .pip_install("fastapi[standard]==0.115.4")
+)
+@modal.asgi_app(label="rerank-endpoint")
+def fastapi_app():
+    from fastapi import FastAPI, Request, Response
+    from fastapi.staticfiles import StaticFiles
+    web_app = FastAPI()
+    # The endpoint for the prediction function takes an image as a
+    # [data URI](https://en.wikipedia.org/wiki/Data_URI_scheme)
+    # and returns another image, also as a data URI:
+    @web_app.post("/predict")
+    async def predict(request: Request):
+        # Takes a webcam image as a datauri, returns a bounding box image as a datauri
+        body = await request.body()
+        output_data = RerankerService().rank.remote("What is the capital of China?",["The capital of China is Beijing.","Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.",])
+        return Response(content=output_data)
+    return web_app

modal/rerank_service_vllm.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import modal
+import logging
+app = modal.App("qwen-reranker-vllm")
+hf_cache_vol = modal.Volume.from_name("mcp-datascientist-model-weights-vol")
+vllm_cache_vol = modal.Volume.from_name("vllm-cache")
+MINUTES = 60  # seconds
+vllm_image = (
+    modal.Image.debian_slim(python_version="3.12")
+    .pip_install(
+        "vllm==0.8.5",
+        "transformers",
+        "torch",
+        "fastapi[all]",
+        "pydantic"
+    )
+    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
+)
+with vllm_image.imports():
+    from transformers import AutoTokenizer
+    from vllm import LLM, SamplingParams
+    from vllm.inputs.data import TokensPrompt
+    import torch
+    import math
+@app.cls(image=vllm_image,
+         gpu="A100-40GB",
+         scaledown_window=15 * MINUTES,  # how long should we stay up with no requests?
+         timeout=10 * MINUTES,
+         volumes = {
+    "/root/.cache/huggingface":hf_cache_vol,
+    "/root/.cache/vllm": vllm_cache_vol,
+})
+class Reranker:
+    @modal.enter()
+    def load_reranker(self):
+        logging.info("in the rank function")
+        self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-Reranker-4B")
+        self.tokenizer.padding_side = "left"
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.model = LLM(
+            model="Qwen/Qwen3-Reranker-4B",
+            tensor_parallel_size=torch.cuda.device_count(),
+            max_model_len=10000,
+            enable_prefix_caching=True,
+            gpu_memory_utilization=0.8
+        )
+        self.suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
+        self.suffix_tokens = self.tokenizer.encode(self.suffix, add_special_tokens=False)
+        self.max_length = 8192
+        self.true_token = self.tokenizer("yes", add_special_tokens=False).input_ids[0]
+        self.false_token = self.tokenizer("no", add_special_tokens=False).input_ids[0]
+        self.sampling_params = SamplingParams(
+            temperature=0,
+            max_tokens=1,
+            logprobs=20,
+            allowed_token_ids=[self.true_token, self.false_token],
+        )
+    def format_instruction(self, instruction, query, doc):
+        return [
+            {"role": "system", "content": "Judge whether the Table will be usefull to create an sql request to answer the Query. Note that the answer can only be \"yes\" or \"no\""},
+            {"role": "user", "content": f"<Instruct>: {instruction}\n\n<Query>: {query}\n\n<Document>: {doc}"}
+        ]
+    def process_inputs(self,pairs, instruction):
+        messages = [self.format_instruction(instruction, query, doc) for query, doc in pairs]
+        messages =  self.tokenizer.apply_chat_template(
+            messages, tokenize=True, add_generation_prompt=False, enable_thinking=False
+        )
+        messages = [ele[:self.max_length] + self.suffix_tokens for ele in messages]
+        messages = [TokensPrompt(prompt_token_ids=ele) for ele in messages]
+        return messages
+    def compute_logits(self, messages):
+        outputs = self.model.generate(messages, self.sampling_params, use_tqdm=False)
+        scores = []
+        for i in range(len(outputs)):
+            final_logits = outputs[i].outputs[0].logprobs[-1]
+            token_count = len(outputs[i].outputs[0].token_ids)
+            if self.true_token not in final_logits:
+                true_logit = -10
+            else:
+                true_logit = final_logits[self.true_token].logprob
+            if self.false_token not in final_logits:
+                false_logit = -10
+            else:
+                false_logit = final_logits[self.false_token].logprob
+            true_score = math.exp(true_logit)
+            false_score = math.exp(false_logit)
+            score = true_score / (true_score + false_score)
+            scores.append(score)
+        return scores
+    @modal.method()
+    def rerank(self, query, documents,task):
+        #task = 'Given a web search query, retrieve relevant passages that answer the query'
+        pairs = [(query, doc) for doc in documents]
+        inputs = self.process_inputs(pairs, task)
+        scores = self.compute_logits( inputs)
+        return [{"score": float(score), "content": doc} for score, doc in zip(scores, documents)]
+@app.function(
+    image=modal.Image.debian_slim(python_version="3.12")
+    .pip_install("fastapi[standard]==0.115.4","pydantic")
+)
+@modal.asgi_app(label="rerank-endpoint")
+def fastapi_app():
+    from pydantic import BaseModel
+    from fastapi import FastAPI, Request, Response
+    from fastapi.responses import JSONResponse
+    from typing import List
+    web_app = FastAPI()
+    reranker = Reranker()
+    class ScoringResult(BaseModel):
+        score: float
+        content: str
+    class RankingRequest(BaseModel):
+        task:str
+        query: str
+        documents: List[str]
+    @web_app.post("/rank",response_model=List[ScoringResult])
+    async def predict(payload: RankingRequest):
+        logging.info("call the rank function")
+        query = payload.query
+        documents = payload.documents
+        task = payload.task
+        output_data = reranker.rerank.remote(query,documents,task)
+        return JSONResponse(content=output_data)
+    return web_app

modal/vllm_service.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import modal
+vllm_image = (
+    modal.Image.debian_slim(python_version="3.12")
+    .pip_install(
+        "vllm==0.7.2",
+        "transformers==4.51.0",
+        "huggingface_hub[hf_transfer]",
+        "flashinfer-python==0.2.0.post2",
+        extra_index_url="https://flashinfer.ai/whl/cu124/torch2.5",
+    )
+    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})  # faster model transfers
+)
+vllm_image = vllm_image.env({"VLLM_USE_V1": "1"})
+hf_cache_vol = modal.Volume.from_name("mcp-datascientist-model-weights-vol")
+vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
+app = modal.App("example-vllm-openai-compatible")
+N_GPU = 1  # tip: for best results, first upgrade to more powerful GPUs, and only then increase GPU count
+API_KEY = "super-secret-key-mcp-hackathon"  # api key, for auth. for production use, replace with a modal.Secret
+MINUTES = 60  # seconds
+VLLM_PORT = 8000
+MODEL_NAME = "Qwen/Qwen3-14B"
+@app.function(
+    image=vllm_image,
+    gpu=f"A100-40GB",
+    scaledown_window=15 * MINUTES,  # how long should we stay up with no requests?
+    timeout=10 * MINUTES,  # how long should we wait for container start?
+    volumes={
+        "/root/.cache/huggingface": hf_cache_vol,
+        "/root/.cache/vllm": vllm_cache_vol,
+    },
+)
+@modal.concurrent(
+    max_inputs=10
+)  # how many requests can one replica handle? tune carefully!
+@modal.web_server(port=VLLM_PORT, startup_timeout=5 * MINUTES)
+def serve():
+    import subprocess
+    cmd = [
+        "vllm",
+        "serve",
+        "--uvicorn-log-level=info",
+        MODEL_NAME,
+        "--host",
+        "0.0.0.0",
+        "--port",
+        str(VLLM_PORT),
+        "--api-key",
+        API_KEY,
+    ]
+    subprocess.Popen(" ".join(cmd), shell=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,111 @@

+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.9
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+atpublic==6.0.1
+attrs==25.3.0
+certifi==2025.4.26
+charset-normalizer==3.4.2
+click==8.1.8
+distro==1.9.0
+duckdb==1.3.0
+fastapi==0.115.12
+ffmpy==0.6.0
+filelock==3.18.0
+frozenlist==1.6.2
+fsspec==2025.5.1
+gradio==5.33.0
+gradio-client==1.10.2
+groovy==0.1.2
+grpclib==0.4.7
+h11==0.16.0
+h2==4.2.0
+hf-xet==1.1.3
+hpack==4.1.0
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.32.4
+hyperframe==6.1.0
+ibis-framework==10.5.0
+idna==3.10
+jinja2==3.1.6
+jiter==0.10.0
+markdown-it-py==3.0.0
+markupsafe==3.0.2
+mcp==1.9.0
+mdurl==0.1.2
+modal==1.0.3
+mpmath==1.3.0
+multidict==6.4.4
+mysqlclient==2.2.7
+networkx==3.5
+numpy==2.2.6
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
+openai==1.84.0
+orjson==3.10.18
+packaging==25.0
+pandas==2.2.3
+parsy==2.1
+pillow==11.2.1
+propcache==0.3.1
+protobuf==6.31.1
+pyarrow==20.0.0
+pyarrow-hotfix==0.7
+pydantic==2.11.5
+pydantic-core==2.33.2
+pydantic-settings==2.9.1
+pydub==0.25.1
+pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+pyyaml==6.0.2
+requests==2.32.3
+rich==14.0.0
+ruff==0.11.13
+safehttpx==0.1.6
+semantic-version==2.10.0
+setuptools==80.9.0
+shellingham==1.5.4
+sigtools==4.0.1
+six==1.17.0
+sniffio==1.3.1
+sqlglot==26.24.0
+sse-starlette==2.3.6
+starlette==0.46.2
+sympy==1.14.0
+synchronicity==0.9.13
+tabulate==0.9.0
+toml==0.10.2
+tomlkit==0.13.3
+toolz==1.0.0
+tqdm==4.67.1
+triton==3.3.1
+typer==0.16.0
+types-certifi==2021.10.8.3
+types-toml==0.10.8.20240310
+typing-extensions==4.14.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.3
+watchfiles==1.0.5
+websockets==15.0.1
+yarl==1.20.0

services/__init__.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import re
+def generate_sql(client,query,tables):
+    out = "## Database tables\n"
+    for table in tables:
+        out += table.get('content')
+    prompt = f"Generate an sql query to answer this question {query} \n Based on this database information \n {out} /no_think"
+    print(prompt)
+    response = client.chat.completions.create(
+        model="Qwen/Qwen3-8B",  # nom du modèle à utiliser
+        messages=[
+            {"role": "system", "content": "You are an expert in generating sql query based on a given schema. You will output the generated query in <sql> </sql> tags"},
+            {"role": "user", f"content": prompt}
+        ],
+        temperature=0.7
+    )
+    # Affichage du résultat
+    txt = response.choices[0].message.content
+    match = re.search(r"<sql>(.*?)</sql>", txt, re.DOTALL | re.IGNORECASE)
+    if match:
+        return match.group(1).strip()
+    return None

services/ai.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import re
+import requests
+import os
+def rank_tables(query,tables):
+    task = "Given a user query find the usefull tables in order to build an sql request"
+    payload = {
+        "task":task,
+        "query": query,
+        "documents": tables
+    }
+    response = requests.post(os.getenv('RERANK_ENDPOINT'), json=payload)
+    if response.status_code == 200:
+        results = response.json()
+        sorted_results = sorted(results, key=lambda x: x["score"], reverse=True)
+        return sorted_results
+    else:
+        raise Exception(f"Request failed: {response.status_code} - {response.text}")
+def call_llm(client, system_prompt, user_prompt, model="Qwen/Qwen3-14B", temperature=0):
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ],
+        temperature=temperature
+    )
+    return response.choices[0].message.content
+def call_llm_streaming(client, system_prompt, user_prompt,
+                       model="Qwen/Qwen3-14B", temperature=0):
+    # Start streaming chat completion
+    stream = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ],
+        temperature=temperature,
+        stream=True
+    )
+    out = ""
+    for chunk in stream:
+        delta = chunk.choices[0].delta.content
+        if delta:
+            out += delta
+            yield delta
+    return out
+def format_thinking(thinking):
+    return "" if thinking else "/no_think"
+def extract_tagged_content(text, tag):
+    pattern = fr"<{tag}>(.*?)</{tag}>"
+    match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
+    return match.group(1).strip() if match else None
+def generate_answer(client, sql, query, result, thinking=False):
+    prompt = (
+        f"The user asked this question: \n\"{query}\"\n"
+        f"The executed SQL was: \n\"{sql}\"\n"
+        f"This is the result: \n{result}\n"
+        f"Please give an answer to the user "
+        f"Use an amical thone and at the end suggest followup questions related to the first user question {format_thinking(thinking)}"
+    )
+    system = "You are an expert in providing response to questions based on provided content"
+    return call_llm_streaming(client, system, prompt)
+def evaluate_difficulty(client, query):
+    prompt = (
+        f"Output a grade between 0 and 10 on how difficult it is to generate an SQL query "
+        f"to answer this question:\n{query}\n/no_think"
+    )
+    system = (
+        "You task is to evaluate the level of difficulty for generating an sql query. "
+        "You will only output the difficulty level which is between 0 and 10, output in <score></score> tags"
+    )
+    content = call_llm(client, system, prompt)
+    return extract_tagged_content(content, "score")
+def generate_sql(client, query, tables, thinking=False):
+    schema_info = "## Database tables\n" + "\n".join(tables)
+    prompt = (
+        f"Generate an SQL query to answer this question: \"{query}\"\n"
+        f"Based on this database information:\n{schema_info} {format_thinking(thinking)}"
+    )
+    system = (
+        "You are an expert in generating SQL queries based on a given schema. "
+        "You will output the generated query in <sql></sql> tags. "
+        "Attention: you can only run one SQL query, so if you need multiple steps, you must use subqueries."
+    )
+    content = call_llm(client, system, prompt)
+    return extract_tagged_content(content, "sql")
+def correct_sql(client, question, query, tables, error, thinking=True):
+    schema_info = "## Database tables\n" + "\n".join(tables)
+    prompt = (
+        f"To answer this question: \"{question}\", I tried to run this SQL query:\n{query}\n"
+        f"But I got this error:\n{error}\n"
+        f"Please take care of the provided schema and give a correct SQL to answer the question. "
+        f"Output the query in <sql></sql> tags.\n{schema_info} {format_thinking(thinking)}"
+    )
+    system = (
+        "You are an expert in generating SQL queries based on a given schema. "
+        "You will output the generated query in <sql></sql> tags."
+    )
+    content = call_llm(client, system, prompt)
+    return extract_tagged_content(content, "sql")

services/utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from urllib.parse import urlparse
+def filter_tables(threshold, sorted_results):
+    return [doc["content"] for doc in sorted_results if doc["score"] > threshold]
+def get_db_scheme_from_uri(uri: str) -> str:
+    """
+    Given a SQLAlchemy-style connection URI, return its scheme name
+    (with any '+driver' suffix stripped).
+    Examples:
+        >>> get_db_scheme_from_uri("postgresql://user:pass@host/db")
+        'postgresql'
+        >>> get_db_scheme_from_uri("postgresql+psycopg2://user:pass@host/db")
+        'postgresql'
+        >>> get_db_scheme_from_uri("duckdb:///path/to/db.duckdb")
+        'duckdb'
+    """
+    parsed = urlparse(uri)
+    scheme = parsed.scheme
+    if not scheme:
+        raise ValueError(f"No scheme found in URI: {uri!r}")
+    # Strip any "+driver" suffix (e.g. "mysql+mysqldb")
+    return scheme.split("+", 1)[0]
+import os
+from urllib.parse import urlparse
+def extract_filename(path_or_url):
+    """
+    Extract the file name from a local path or a URL.
+    Args:
+        path_or_url (str): The file path or URL.
+    Returns:
+        str: The extracted file name.
+    """
+    parsed = urlparse(path_or_url)
+    if parsed.scheme in ('http', 'https', 'ftp'):
+        return os.path.basename(parsed.path)
+    else:
+        return os.path.basename(path_or_url)