ljy5946 commited on
Commit
35634c4
·
verified ·
1 Parent(s): 4ded835

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -7,13 +7,17 @@ import torch
7
 
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
 
10
- from langchain_community.embeddings import HuggingFaceEmbeddings
11
- from langchain_community.vectorstores import Chroma
 
 
 
12
  from langchain_community.llms import HuggingFacePipeline
 
13
  from langchain.chains import RetrievalQA
14
  from langchain.prompts import PromptTemplate
15
 
16
- from build_index import main as build_index_if_needed # 需确保 build_index.py 在同目录
17
 
18
  logging.basicConfig(level=logging.INFO)
19
 
@@ -22,7 +26,7 @@ VECTOR_STORE_DIR = "./vector_store"
22
  MODEL_NAME = "uer/gpt2-chinese-cluecorpussmall"
23
  EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
24
 
25
- # 容器启动时自动构建向量库(如果还没提交 vector_store
26
  if not os.path.exists(VECTOR_STORE_DIR) or not os.listdir(VECTOR_STORE_DIR):
27
  logging.info("向量库不存在,启动自动构建……")
28
  build_index_if_needed()
@@ -72,9 +76,8 @@ prompt_template = PromptTemplate.from_template(
72
  # ─── 4. 构建 RAG 问答链(map_reduce) ───────────────────────────
73
  qa_chain = RetrievalQA.from_chain_type(
74
  llm=llm,
75
- chain_type="map_reduce", # map_reduce 避免超长
76
  retriever=retriever,
77
- chain_type_kwargs={"prompt": prompt_template},
78
  return_source_documents=True,
79
  )
80
  logging.info("✅ RAG 问答链(map_reduce)构建成功。")
@@ -122,3 +125,4 @@ if __name__ == "__main__":
122
 
123
 
124
 
 
 
7
 
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
 
10
+ # Embeddings VectorStore 用新的分包
11
+ from langchain_huggingface import HuggingFaceEmbeddings
12
+ from langchain_chroma import Chroma
13
+
14
+ # LLM 继续用 community 包里的 Pipeline
15
  from langchain_community.llms import HuggingFacePipeline
16
+
17
  from langchain.chains import RetrievalQA
18
  from langchain.prompts import PromptTemplate
19
 
20
+ from build_index import main as build_index_if_needed # 确保 build_index.py 与 app.py 同目录
21
 
22
  logging.basicConfig(level=logging.INFO)
23
 
 
26
  MODEL_NAME = "uer/gpt2-chinese-cluecorpussmall"
27
  EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
28
 
29
+ # 容器启动时自动构建向量库(如果 vector_store 目录为空)
30
  if not os.path.exists(VECTOR_STORE_DIR) or not os.listdir(VECTOR_STORE_DIR):
31
  logging.info("向量库不存在,启动自动构建……")
32
  build_index_if_needed()
 
76
  # ─── 4. 构建 RAG 问答链(map_reduce) ───────────────────────────
77
  qa_chain = RetrievalQA.from_chain_type(
78
  llm=llm,
79
+ chain_type="map_reduce", # map_reduce 自动分段、避免超长
80
  retriever=retriever,
 
81
  return_source_documents=True,
82
  )
83
  logging.info("✅ RAG 问答链(map_reduce)构建成功。")
 
125
 
126
 
127
 
128
+