Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,13 +7,17 @@ import torch
|
|
7 |
|
8 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
9 |
|
10 |
-
|
11 |
-
from
|
|
|
|
|
|
|
12 |
from langchain_community.llms import HuggingFacePipeline
|
|
|
13 |
from langchain.chains import RetrievalQA
|
14 |
from langchain.prompts import PromptTemplate
|
15 |
|
16 |
-
from build_index import main as build_index_if_needed #
|
17 |
|
18 |
logging.basicConfig(level=logging.INFO)
|
19 |
|
@@ -22,7 +26,7 @@ VECTOR_STORE_DIR = "./vector_store"
|
|
22 |
MODEL_NAME = "uer/gpt2-chinese-cluecorpussmall"
|
23 |
EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
|
24 |
|
25 |
-
#
|
26 |
if not os.path.exists(VECTOR_STORE_DIR) or not os.listdir(VECTOR_STORE_DIR):
|
27 |
logging.info("向量库不存在,启动自动构建……")
|
28 |
build_index_if_needed()
|
@@ -72,9 +76,8 @@ prompt_template = PromptTemplate.from_template(
|
|
72 |
# ─── 4. 构建 RAG 问答链(map_reduce) ───────────────────────────
|
73 |
qa_chain = RetrievalQA.from_chain_type(
|
74 |
llm=llm,
|
75 |
-
chain_type="map_reduce",
|
76 |
retriever=retriever,
|
77 |
-
chain_type_kwargs={"prompt": prompt_template},
|
78 |
return_source_documents=True,
|
79 |
)
|
80 |
logging.info("✅ RAG 问答链(map_reduce)构建成功。")
|
@@ -122,3 +125,4 @@ if __name__ == "__main__":
|
|
122 |
|
123 |
|
124 |
|
|
|
|
7 |
|
8 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
9 |
|
10 |
+
# Embeddings 与 VectorStore 用新的分包
|
11 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
12 |
+
from langchain_chroma import Chroma
|
13 |
+
|
14 |
+
# LLM 继续用 community 包里的 Pipeline
|
15 |
from langchain_community.llms import HuggingFacePipeline
|
16 |
+
|
17 |
from langchain.chains import RetrievalQA
|
18 |
from langchain.prompts import PromptTemplate
|
19 |
|
20 |
+
from build_index import main as build_index_if_needed # 确保 build_index.py 与 app.py 同目录
|
21 |
|
22 |
logging.basicConfig(level=logging.INFO)
|
23 |
|
|
|
26 |
MODEL_NAME = "uer/gpt2-chinese-cluecorpussmall"
|
27 |
EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
|
28 |
|
29 |
+
# 容器启动时自动构建向量库(如果 vector_store 目录为空)
|
30 |
if not os.path.exists(VECTOR_STORE_DIR) or not os.listdir(VECTOR_STORE_DIR):
|
31 |
logging.info("向量库不存在,启动自动构建……")
|
32 |
build_index_if_needed()
|
|
|
76 |
# ─── 4. 构建 RAG 问答链(map_reduce) ───────────────────────────
|
77 |
qa_chain = RetrievalQA.from_chain_type(
|
78 |
llm=llm,
|
79 |
+
chain_type="map_reduce", # map_reduce 自动分段、避免超长
|
80 |
retriever=retriever,
|
|
|
81 |
return_source_documents=True,
|
82 |
)
|
83 |
logging.info("✅ RAG 问答链(map_reduce)构建成功。")
|
|
|
125 |
|
126 |
|
127 |
|
128 |
+
|