Prince-2025 commited on
Commit
3ec3a80
·
verified ·
1 Parent(s): 7b9595c

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +34 -0
  2. app.py +97 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python 3.11 image
2
+ FROM python:3.10-slim
3
+
4
+ # Install system dependencies
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ cmake \
8
+ libopenblas-dev \
9
+ git \
10
+ curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Set environment variables
14
+ ENV PYTHONUNBUFFERED=1 \
15
+ PIP_NO_CACHE_DIR=1 \
16
+ HF_HUB_DISABLE_SYMLINKS_WARNING=1
17
+
18
+ # Create app directory
19
+ WORKDIR /app
20
+
21
+ # Copy your app files
22
+ COPY app.py /app/app.py
23
+ COPY requirements.txt /app/requirements.txt
24
+
25
+ # Install Python dependencies (including llama-cpp-python from PyPI — NOT broken wheel links)
26
+ RUN pip install --upgrade pip && \
27
+ pip install llama-cpp-python==0.2.72 && \
28
+ pip install -r requirements.txt
29
+
30
+ # Expose FastAPI port
31
+ EXPOSE 7860
32
+
33
+ # Start the FastAPI app
34
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import ctypes
4
+ from fastapi import FastAPI, Request
5
+ from fastapi.responses import StreamingResponse
6
+ from huggingface_hub import hf_hub_download
7
+ from llama_cpp import Llama
8
+ import llama_cpp
9
+ import uvicorn
10
+
11
+ # === Suppress llama_cpp logs ===
12
+ @ctypes.CFUNCTYPE(None, ctypes.c_int, ctypes.c_char_p, ctypes.c_void_p)
13
+ def silent_log_callback(level, message, user_data):
14
+ pass
15
+
16
+ llama_cpp.llama_log_set(silent_log_callback, None)
17
+
18
+ # === Model configuration ===
19
+ MODEL_REPO = "Qwen/Qwen1.5-1.8B-Chat-GGUF"
20
+ MODEL_FILE = "qwen1_5-1_8b-chat-q4_k_m.gguf"
21
+ CACHE_DIR = "/data/models" # Use /data for Hugging Face Spaces
22
+
23
+ # === Download model if not present ===
24
+ model_path = hf_hub_download(
25
+ repo_id=MODEL_REPO,
26
+ filename=MODEL_FILE,
27
+ cache_dir=CACHE_DIR,
28
+ local_dir=CACHE_DIR,
29
+ local_dir_use_symlinks=False
30
+ )
31
+
32
+ # === Load LLM ===
33
+ llm = Llama(
34
+ model_path=model_path,
35
+ n_ctx=4096,
36
+ n_batch=512,
37
+ n_threads=os.cpu_count(),
38
+ n_gpu_layers=0, # CPU-only
39
+ use_mmap=True,
40
+ use_mlock=False,
41
+ low_cpu_mem_usage=True,
42
+ verbose=False
43
+ )
44
+
45
+ # === Initialize FastAPI ===
46
+ app = FastAPI()
47
+
48
+ # === Streaming generator ===
49
+ def stream_chat_response(user_query: str):
50
+ messages = [
51
+ {
52
+ "role": "system",
53
+ "content": (
54
+ "You are a helpful AI assistant that reads research papers, "
55
+ "highlights the key points, and explains the paper in simple terms."
56
+ )
57
+ },
58
+ {
59
+ "role": "user",
60
+ "content": user_query
61
+ }
62
+ ]
63
+
64
+ def generate():
65
+ try:
66
+ for chunk in llm.create_chat_completion(
67
+ messages=messages,
68
+ temperature=0.2,
69
+ top_k=50,
70
+ top_p=0.9,
71
+ repeat_penalty=1.1,
72
+ max_tokens=512,
73
+ stream=True,
74
+ ):
75
+ if "choices" in chunk:
76
+ token = chunk["choices"][0]["delta"].get("content", "")
77
+ yield f"data: {token}\n\n"
78
+ except Exception as e:
79
+ yield f"data: [ERROR] {str(e)}\n\n"
80
+
81
+ return StreamingResponse(generate(), media_type="text/event-stream")
82
+
83
+ # === Root ===
84
+ @app.get("/")
85
+ def read_root():
86
+ return {"message": "LLM is ready. Send POST to /chat"}
87
+
88
+ # === POST endpoint to chat with paper ===
89
+ @app.post("/chat")
90
+ async def chat_with_llm(request: Request):
91
+ data = await request.json()
92
+ user_query = data.get("query", "")
93
+ return stream_chat_response(user_query)
94
+
95
+ # === Run the app (required for HF Spaces) ===
96
+ if __name__ == "__main__":
97
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ huggingface_hub