Spaces:
Running
on
Zero
Running
on
Zero
update
Browse files- app.py +1 -1
- requirements.txt +2 -1
- utils.py +1 -1
app.py
CHANGED
@@ -38,7 +38,7 @@ generate_kwargs = dict(
|
|
38 |
llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID = load_llama_guard("meta-llama/Llama-Guard-3-1B")
|
39 |
|
40 |
## RAG MODEL
|
41 |
-
RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert")
|
42 |
|
43 |
try:
|
44 |
gr.Info("Setting up retriever, please wait...")
|
|
|
38 |
llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID = load_llama_guard("meta-llama/Llama-Guard-3-1B")
|
39 |
|
40 |
## RAG MODEL
|
41 |
+
RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert", n_gpu=1)
|
42 |
|
43 |
try:
|
44 |
gr.Info("Setting up retriever, please wait...")
|
requirements.txt
CHANGED
@@ -12,4 +12,5 @@ arxiv
|
|
12 |
ragatouille
|
13 |
hf_transfer
|
14 |
colorlog
|
15 |
-
accelerate==1.1.1
|
|
|
|
12 |
ragatouille
|
13 |
hf_transfer
|
14 |
colorlog
|
15 |
+
accelerate==1.1.1
|
16 |
+
faiss-gpu
|
utils.py
CHANGED
@@ -188,7 +188,7 @@ def moderate(chat, model, tokenizer, UNSAFE_TOKEN_ID):
|
|
188 |
######
|
189 |
# Get generated text
|
190 |
######
|
191 |
-
logger.info(outputs)
|
192 |
# Number of tokens that correspond to the input prompt
|
193 |
input_length = inputs.input_ids.shape[1]
|
194 |
# Ignore the tokens from the input to get the tokens generated by the model
|
|
|
188 |
######
|
189 |
# Get generated text
|
190 |
######
|
191 |
+
# logger.info(outputs)
|
192 |
# Number of tokens that correspond to the input prompt
|
193 |
input_length = inputs.input_ids.shape[1]
|
194 |
# Ignore the tokens from the input to get the tokens generated by the model
|