hungchiayu1 commited on
Commit
12dd7f1
·
1 Parent(s): aa2596d
README.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: Trust-Align
3
- emoji: 🔥
4
- colorFrom: blue
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 4.37.2
8
- app_file: app.py
9
- pinned: false
10
- short_description: Trust-Align
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
- Index Last Updated : 2024-11-16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
__pycache__/utils.cpython-310.pyc DELETED
Binary file (9.06 kB)
 
app.py DELETED
@@ -1,249 +0,0 @@
1
- # import os
2
- # # Set CUDA device dynamically
3
- # os.environ["CUDA_VISIBLE_DEVICES"] = ""
4
-
5
- import spaces
6
- import torch
7
- import transformers
8
- import gradio as gr
9
- from ragatouille import RAGPretrainedModel
10
- from huggingface_hub import InferenceClient
11
- import re
12
- from datetime import datetime
13
- import json
14
- import os
15
- import arxiv
16
- from utils import get_md_text_abstract, search_cleaner, get_arxiv_live_search, make_demo, make_doc_prompt, load_llama_guard, moderate, LLM
17
-
18
- global MODEL, CURRENT_MODEL
19
- MODEL, CURRENT_MODEL = None, None
20
-
21
- retrieve_results = 10
22
- show_examples = True
23
- llm_models_to_choose = ['Trust-Align-Qwen2.5', "meta-llama/Meta-Llama-3-8B-Instruct",'None']
24
- llm_location_map={
25
- "Trust-Align-Qwen2.5": os.getenv("MODEL_NAME"),
26
- "meta-llama/Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct", # "Qwen/Qwen2.5-7B-Instruct"
27
- "None": None
28
- }
29
-
30
- generate_kwargs = dict(
31
- temperature = 0.1,
32
- max_new_tokens = 512,
33
- top_p = 1.0,
34
- do_sample = True,
35
- )
36
-
37
- # Load llama Guard
38
- # llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID = load_llama_guard("meta-llama/Llama-Guard-3-1B")
39
-
40
- ## RAG MODEL
41
- test_tensor = torch.tensor(2).to('cuda')
42
- RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert", n_gpu=1)
43
-
44
- try:
45
- gr.Info("Setting up retriever, please wait...")
46
- rag_initial_output = RAG.search("what is Mistral?", k = 1)
47
- gr.Info("Retriever working successfully!")
48
-
49
- except:
50
- gr.Warning("Retriever not working!")
51
-
52
- def choose_llm(choosed_llm):
53
- global MODEL, CURRENT_MODEL
54
- try:
55
- gr.Info("Setting up LLM, please wait...")
56
- MODEL = LLM(llm_location_map[choosed_llm], use_vllm=False)
57
- CURRENT_MODEL = choosed_llm
58
- gr.Info("LLM working successfully!")
59
- except Exception as e:
60
- raise RuntimeError("Failed to load the LLM MODEL.") from e
61
-
62
- choose_llm(llm_models_to_choose[0])
63
-
64
- # prompt used for generation
65
- try:
66
- with open("rejection_full.json") as f:
67
- prompt_data = json.load(f)
68
- except FileNotFoundError:
69
- raise RuntimeError("Prompt data file 'rejection_full.json' not found.")
70
- except json.JSONDecodeError:
71
- raise RuntimeError("Failed to decode 'rejection_full.json'.")
72
-
73
- ## Header
74
- mark_text = '# 🔍 Search Results\n'
75
- header_text = "# 🤖 Trust-Align: Measuring and Enhancing Trustworthiness of LLMs in RAG through Grounded Attributions and Learning to Refuse\n \n"
76
-
77
- try:
78
- with open("README.md", "r") as f:
79
- mdfile = f.read()
80
- date_pattern = r'Index Last Updated : \d{4}-\d{2}-\d{2}'
81
- match = re.search(date_pattern, mdfile)
82
- date = match.group().split(': ')[1]
83
- formatted_date = datetime.strptime(date, '%Y-%m-%d').strftime('%d %b %Y')
84
- header_text += f'Index Last Updated: {formatted_date}\n'
85
- index_info = f"Semantic Search - up to {formatted_date}"
86
- except:
87
- index_info = "Semantic Search"
88
-
89
- database_choices = [index_info,'Arxiv Search - Latest - (EXPERIMENTAL)']
90
-
91
- ## Arxiv API
92
- arx_client = arxiv.Client()
93
- is_arxiv_available = True
94
- check_arxiv_result = get_arxiv_live_search("What is Mistral?", arx_client, retrieve_results)
95
- if len(check_arxiv_result) == 0:
96
- is_arxiv_available = False
97
- print("Arxiv search not working, switching to default search ...")
98
- database_choices = [index_info]
99
-
100
-
101
-
102
- ## Show examples (disabled)
103
- if show_examples:
104
- with open("sample_outputs.json", "r") as f:
105
- sample_outputs = json.load(f)
106
- output_placeholder = sample_outputs['output_placeholder']
107
- md_text_initial = sample_outputs['search_placeholder']
108
-
109
- else:
110
- output_placeholder = None
111
- md_text_initial = ''
112
-
113
-
114
- def rag_cleaner(inp):
115
- rank = inp['rank']
116
- title = inp['document_metadata']['title']
117
- content = inp['content']
118
- date = inp['document_metadata']['_time']
119
- return f"{rank}. <b> {title} </b> \n Date : {date} \n Abstract: {content}"
120
-
121
- def get_references(question, retriever, k = retrieve_results):
122
- rag_out = retriever.search(query=question, k=k)
123
- return rag_out
124
-
125
- def get_rag(message):
126
- return get_references(message, RAG)
127
-
128
- with gr.Blocks(theme = gr.themes.Soft()) as demo:
129
- header = gr.Markdown(header_text)
130
-
131
- with gr.Group():
132
- msg = gr.Textbox(label = 'Search', placeholder = 'What is Mistral?')
133
-
134
- with gr.Accordion("Advanced Settings", open=False):
135
- with gr.Row(equal_height = True):
136
- llm_model = gr.Dropdown(choices = llm_models_to_choose, value = 'Trust-Align-Qwen2.5', label = 'LLM MODEL')
137
- llm_results = gr.Slider(minimum=1, maximum=retrieve_results, value=3, step=1, interactive=True, label="Top n results as context")
138
- database_src = gr.Dropdown(choices = database_choices, value = database_choices[-1], label = 'Search Source')
139
- stream_results = gr.Checkbox(value = True, label = "Stream output", visible = False)
140
-
141
- output_text = gr.Textbox(show_label = True, container = True, label = 'LLM Answer', visible = True, placeholder = output_placeholder)
142
- input = gr.Textbox(visible=False) # placeholder
143
- gr_md = gr.Markdown(mark_text + md_text_initial)
144
-
145
- # @spaces.GPU(duration=60)
146
- def update_with_rag_md(message, llm_results_use = 5, database_choice = index_info, llm_model_picked = 'Trust-Align-Qwen2.5'):
147
- # chat_round = [
148
- # {"role": "user",
149
- # "content": [
150
- # {"type": "text",
151
- # "text": message
152
- # }
153
- # ]
154
- # }
155
- # ]
156
- # llama guard check for it
157
- # prompt_safety = moderate(chat_round, llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID)['generated_text']
158
- prompt_safety = "safe"
159
-
160
- if prompt_safety == "safe":
161
- docs = []
162
- database_to_use = database_choice
163
- if database_choice == index_info:
164
- rag_out = get_rag(message)
165
- else:
166
- arxiv_search_success = True
167
- try:
168
- rag_out = get_arxiv_live_search(message, arx_client, retrieve_results)
169
- if len(rag_out) == 0:
170
- arxiv_search_success = False
171
- except:
172
- arxiv_search_success = False
173
-
174
- if not arxiv_search_success:
175
- gr.Warning("Arxiv Search not working, switching to semantic search ...")
176
- rag_out = get_rag(message)
177
- database_to_use = index_info
178
-
179
- md_text_updated = mark_text
180
- for i in range(retrieve_results):
181
- rag_answer = rag_out[i]
182
- if i < llm_results_use:
183
- md_text_paper, doc = get_md_text_abstract(rag_answer, source = database_to_use, return_prompt_formatting = True)
184
- docs.append(doc)
185
- md_text_paper = md_text_paper.strip("###")
186
- md_text_updated += f"### [{i+1}] {md_text_paper}"
187
- # else:
188
- # md_text_paper = get_md_text_abstract(rag_answer, source = database_to_use)
189
- # md_text_updated += md_text_paper
190
-
191
- infer_item = {
192
- "question": message,
193
- "docs": docs,
194
- }
195
- prompt = make_demo(
196
- infer_item,
197
- prompt=prompt_data["demo_prompt"],
198
- ndoc=llm_results_use,
199
- doc_prompt=prompt_data["doc_prompt"],
200
- instruction=prompt_data["instruction"],
201
- test=True
202
- )
203
- else:
204
- md_text_updated = mark_text + "### Invalid search query!"
205
- prompt = ""
206
-
207
- return md_text_updated, prompt
208
-
209
-
210
- @spaces.GPU(duration=60)
211
- def ask_llm(prompt, llm_model_picked = 'Trust-Align-Qwen2.5', stream_outputs = False):
212
- model_disabled_text = "LLM MODEL is disabled"
213
- output = ""
214
-
215
- if llm_model_picked == 'None':
216
- if stream_outputs:
217
- for out in model_disabled_text:
218
- output += out
219
- yield output
220
- return output
221
- else:
222
- return model_disabled_text
223
-
224
- global MODEL
225
- if llm_model_picked != CURRENT_MODEL:
226
- del MODEL
227
- import gc
228
- gc.collect
229
- torch.cuda.empty_cache()
230
- choose_llm(llm_model_picked)
231
-
232
- try:
233
- stream = MODEL.generate(prompt, generate_kwargs["max_new_tokens"])
234
- except:
235
- gr.Warning("LLM Inference rate limit reached, try again later!")
236
- return ""
237
-
238
- if stream_outputs:
239
- for response in stream:
240
- output += response
241
- yield output
242
- return output
243
- else:
244
- return output
245
-
246
-
247
- msg.submit(update_with_rag_md, [msg, llm_results, database_src, llm_model], [gr_md, input]).success(ask_llm, [input, llm_model, stream_results], output_text)
248
-
249
- demo.queue().launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rejection_full.json DELETED
@@ -1,184 +0,0 @@
1
- {
2
- "instruction": "Instruction: Write an accurate, engaging, and concise answer for the given question using only the provided search results (some of which might be irrelevant) and cite them properly. Use an unbiased and journalistic tone. Always cite for any factual claim. When citing several search results, use [1][2][3]. Cite at least one document and at most three documents in each sentence. If multiple documents support the sentence, only cite a minimum sufficient subset of the documents. If none of the provided documents contain the answer, only respond with \"I apologize, but I couldn't find an answer to your question in the search results.\". Then, add further explanation as to why an answer cannot be provided.",
3
- "demo_sep": "\n\n\n",
4
- "demo_prompt": "{INST}\n\nQuestion: {Q}\n\n{D}\nAnswer: {A}",
5
- "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
6
- "positive_demos": [
7
- {
8
- "question": "Which is the most rainy place on earth?",
9
- "answer": "Several places on Earth claim to be the most rainy, such as Lloró, Colombia, which reported an average annual rainfall of 12,717 mm between 1952 and 1989, and López de Micay, Colombia, which reported an annual 12,892 mm between 1960 and 2012 [3]. However, the official record is held by Mawsynram, India with an average annual rainfall of 11,872 mm [3], although nearby town Sohra, India, also known as Cherrapunji, holds the record for most rain in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861 [1].",
10
- "docs": [
11
- {
12
- "title": "Cherrapunji",
13
- "text": "Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861, however: it received in"
14
- },
15
- {
16
- "title": "Cherrapunji",
17
- "text": "Radio relay station known as Akashvani Cherrapunji. It broadcasts on FM frequencies. Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall"
18
- },
19
- {
20
- "title": "Mawsynram",
21
- "text": "Mawsynram Mawsynram () is a village in the East Khasi Hills district of Meghalaya state in north-eastern India, 65 kilometres from Shillong. Mawsynram receives one of the highest rainfalls in India. It is reportedly the wettest place on Earth, with an average annual rainfall of 11,872 mm, but that claim is disputed by Lloró, Colombia, which reported an average yearly rainfall of 12,717 mm between 1952 and 1989 and López de Micay, also in Colombia, which reported an annual 12,892 mm per year between 1960 and 2012. According to the \"Guinness Book of World Records\", Mawsynram received of rainfall in 1985. Mawsynram is located at 25° 18′"
22
- },
23
- {
24
- "title": "Earth rainfall climatology",
25
- "text": "Pacific Northwest, and the Sierra Nevada range are the wetter portions of the nation, with average rainfall exceeding per year. The drier areas are the Desert Southwest, Great Basin, valleys of northeast Arizona, eastern Utah, central Wyoming, eastern Oregon and Washington and the northeast of the Olympic Peninsula. The Big Bog on the island of Maui receives, on average, every year, making it the wettest location in the US, and all of Oceania. The annual average rainfall maxima across the continent lie across the northwest from northwest Brazil into northern Peru, Colombia, and Ecuador, then along the Atlantic coast of"
26
- },
27
- {
28
- "title": "Going to Extremes",
29
- "text": "in the world. Oymyakon in Siberia, where the average winter temperature is −47 °F (− 44 °C). Arica in Chile, where there had been fourteen consecutive years without rain. Fog is the only local source of water. Mawsynram in India, where average annual rainfall is 14 meters, falling within a four-month period in the monsoon season. The rainfall is approximately equal to that of its neighbor Cherrapunji. Dallol in Ethiopia, known as the 'Hell-hole of creation' where the temperature averages 94 °F (34 °C) over the year. In his second series, Middleton visited places without permanent towns, locations where \"survival\""
30
- }
31
- ]
32
- },
33
- {
34
- "question": "When did the us break away from england?",
35
- "answer": "The United States took the first step towards gaining independence from Great Britain when it declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, 1776, the date when the Declaration of Independence was officially adopted by Congress) [2]. The Treaty of Paris was later signed on September 3, 1783, formally separating the United States from the British Empire [3].",
36
- "docs": [
37
- {
38
- "title": "United States withdrawal from Saudi Arabia",
39
- "text": "United States withdrawal from Saudi Arabia Beginning during Operation Desert Shield in August 1990, while preparing for the Gulf War, the United States sent a large troop contingent to Saudi Arabia. After the war, remnant troops, primarily U.S. Air Force personnel, augmented by a smaller number of coordinating and training personnel from the U.S. Navy, U.S. Army and U.S. Marine Corps remained in Saudi Arabia under the aegis of Joint Task Force Southwest Asia (JTF-SWA), as part of Operation Southern Watch (OSW). The United Kingdom and France also maintained a small contingent of Royal Air Force and French Air Force"
40
- },
41
- {
42
- "title": "Decolonization of the Americas",
43
- "text": "and France has fully \"integrated\" most of its former colonies as fully constituent \"departments\" of France. The United States of America declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, the date when the Declaration of Independence was officially adopted by Congress), in so doing becoming the first independent, foreign-recognized nation in the Americas and the first European colonial entity to break from its mother country. Britain formally acknowledged American independence in 1783 after its defeat in the American Revolutionary War. Although initially occupying only the land east of the Mississippi"
44
- },
45
- {
46
- "title": "American Revolution",
47
- "text": "second British army at Yorktown in the fall of 1781, effectively ending the war. The Treaty of Paris was signed September 3, 1783, formally ending the conflict and confirming the new nation's complete separation from the British Empire. The United States took possession of nearly all the territory east of the Mississippi River and south of the Great Lakes, with the British retaining control of Canada and Spain taking Florida. Among the significant results of the revolution was the creation of the United States Constitution, establishing a relatively strong federal national government that included an executive, a national judiciary, and"
48
- },
49
- {
50
- "title": "Decolonization",
51
- "text": "accelerate decolonialization and bring an end to the colonial empires of its Western allies, most importantly during the 1956 Suez Crisis, but American military bases were established around the world and direct and indirect interventions continued in Korea, Indochina, Latin America (\"inter alia\", the 1965 occupation of the Dominican Republic), Africa, and the Middle East to oppose Communist invasions and insurgencies. Since the dissolution of the Soviet Union, the United States has been far less active in the Americas, but invaded Afghanistan and Iraq following the September 11 attacks in 2001, establishing army and air bases in Central Asia. Before"
52
- },
53
- {
54
- "title": "Decolonization",
55
- "text": "the responsibility of the United Kingdom (with a copy of the new constitution annexed), and finally, if approved, issuance of an Order of Council fixing the exact date of independence. After World War I, several former German and Ottoman territories in the Middle East, Africa, and the Pacific were governed by the UK as League of Nations mandates. Some were administered directly by the UK, and others by British dominions – Nauru and the Territory of New Guinea by Australia, South West Africa by the Union of South Africa, and Western Samoa by New Zealand. Egypt became independent in 1922,"
56
- }
57
- ]
58
- }
59
- ],
60
- "reject_demos": [
61
- {
62
- "question": "Who set the record for longest field goal?",
63
- "answer": "I apologize, but I couldn't find an answer to your question in the search results.",
64
- "docs": [
65
- {
66
- "id": "5758609",
67
- "title": "Sebastian Janikowski",
68
- "text": "have broken the all-time NFL field goal record of 63 yards. However, it bounced off the right upright and came back out. On September 28, 2008, Janikowski unsuccessfully attempted a 76-yard field goal against the San Diego Chargers into the heavy wind right before halftime. This is presumed to be the longest attempt in NFL history; though the league keeps no such records on attempts, the longest known attempts previous to this were 74 yard attempts by Mark Moseley and Joe Danelo in 1979. On October 19, 2008, Janikowski broke his own Raiders team record, making a 57-yard field goal",
69
- "score": 0.78466796875,
70
- "answers_found": [
71
- 0,
72
- 0
73
- ],
74
- "rec_score": 0.0
75
- },
76
- {
77
- "id": "12183799",
78
- "title": "Dirk Borgognone",
79
- "text": "Dirk Borgognone Dirk Ronald Borgognone (born January 9, 1968) is a former National Football League placekicker who currently holds the record for the longest field goal ever kicked in the history of high school football, 68 yards. Borgognone attended Reno High School, initially playing as a soccer player. He soon switched to football and was trained in a \"straight-on\" kicking style. On September 27, 1985, he kicked the longest field goal in high school football history, during a Reno High School game at Sparks High School. The kick measured 68 yards and was longer than any that had ever been",
80
- "score": 0.7822265625,
81
- "answers_found": [
82
- 0,
83
- 0
84
- ],
85
- "rec_score": 0.0
86
- },
87
- {
88
- "id": "6048593",
89
- "title": "Russell Erxleben",
90
- "text": "against the University of Colorado. Despite early troubles in the game, including a missed field goal and a blocked extra point, Erxleben kicked a field goal late in the game, breaking the tie and securing the win for Texas. In 1977, in a game against Rice University, he set the record for the longest field goal in NCAA history with a 67-yard kick. UT head coach Fred Akers said of the kick, \"It was like a gunshot. We couldn't believe a ball was going that far. It had another eight yards on it.\" Erxleben kicked two other field goals over",
91
- "score": 0.7705078125,
92
- "answers_found": [
93
- 0,
94
- 0
95
- ],
96
- "rec_score": 0.0
97
- },
98
- {
99
- "id": "9303115",
100
- "title": "Field goal",
101
- "text": "to Dempsey's 1970 kick, the longest field goal in NFL history was 56 yards, by Bert Rechichar of the Baltimore Colts A 55-yard field goal, achieved by a drop kick, was recorded by Paddy Driscoll in , and stood as the unofficial record until that point; some sources indicate a 54-yarder by Glenn Presnell in as the due to the inability to precisely verify Driscoll's kick. In a pre-season NFL game between the Denver Broncos and the Seattle Seahawks on August 29, 2002, Ola Kimrin kicked a 65-yard field goal. However, because pre-season games are not counted toward official records,",
102
- "score": 0.76513671875,
103
- "answers_found": [
104
- 0,
105
- 0
106
- ],
107
- "rec_score": 0.0
108
- },
109
- {
110
- "id": "4853018",
111
- "title": "Steve Christie",
112
- "text": "Canton. He then went on to kick five field goals in the Bills 29-10 win over the Miami Dolphins in the AFC championship game, helping Buffalo get to their third consecutive Super Bowl. In 1993, Christie set a Bills record by kicking a 59-yard field goal in a regular season game. It was only four yards short of the all-time NFL record. In Super Bowl XXVIII, Christie set a Super Bowl record by kicking a 54-yard field goal. It is currently the longest field goal ever made in Super Bowl History. In the 2000 season, Christie was an instrumental part",
113
- "score": 0.75341796875,
114
- "answers_found": [
115
- 0,
116
- 0
117
- ],
118
- "rec_score": 0.0
119
- }
120
- ]
121
- },
122
- {
123
- "question": "Who played galen in planet of the apes?",
124
- "answer": "I apologize, but I couldn't find an answer to your question in the search results.",
125
- "docs": [
126
- {
127
- "id": "12677620",
128
- "title": "Planet of the Apes (2001 film)",
129
- "text": "be the Lincoln Memorial, only to find that it is now a monument to General Thade. A swarm of police officers, firefighters, and news reporters descend on Leo, all of whom are apes. Small roles include David Warner (Senator Sandar), Lisa Marie (Nova), Erick Avari (Tival), Luke Eberl (Birn), Evan Parke (Gunnar), Glenn Shadix (Senator Nado), Freda Foh Shen (Bon), Chris Ellis (Lt. Gen. Karl Vasich) and Anne Ramsay (Lt. Col. Grace Alexander). There are also cameo appearances by Charlton Heston (uncredited) as Zaius, Thade's father, and Linda Harrison (the woman in the cart). Both participated in two original films",
130
- "score": 0.7529296875,
131
- "answers_found": [
132
- 0,
133
- 0
134
- ],
135
- "rec_score": 0.0
136
- },
137
- {
138
- "id": "3943319",
139
- "title": "Severn Darden",
140
- "text": "Severn Darden Severn Teakle Darden Jr. (November 9, 1929 \u2013 May 27, 1995) was an American comedian and actor, and an original member of The Second City Chicago-based comedy troupe as well as its predecessor, the Compass Players. He is perhaps best known from his film appearances for playing the human leader Kolp in the fourth and fifth \"Planet of the Apes\" films. Born in New Orleans, Louisiana, he attended the University of Chicago. Darden\u2019s offbeat and intellectual sense of humor, appropriate for someone who attended the University of Chicago and in fact a major element in the style of",
141
- "score": 0.74267578125,
142
- "answers_found": [
143
- 0,
144
- 0
145
- ],
146
- "rec_score": 0.0
147
- },
148
- {
149
- "id": "13813715",
150
- "title": "Planet of the Apes",
151
- "text": "film stars Mark Wahlberg as astronaut Leo Davidson, who accidentally travels through a wormhole to a distant planet where talking apes enslave humans. He leads a human revolt and upends ape civilization by discovering that the apes evolved from the normal earth primates who had accompanied his mission, and arrived years before. Helena Bonham Carter played chimpanzee Ari, while Tim Roth played the human-hating chimpanzee General Thade. The film received mixed reviews; most critics believed it failed to compare to the original. Much of the negative commentary focused on the confusing plot and twist ending, though many reviewers praised the",
152
- "score": 0.74169921875,
153
- "answers_found": [
154
- 0,
155
- 0
156
- ],
157
- "rec_score": 0.0
158
- },
159
- {
160
- "id": "3386349",
161
- "title": "Maurice Evans (actor)",
162
- "text": "Maurice Evans (actor) Maurice Herbert Evans (3 June 1901 \u2013 12 March 1989) was a British actor, noted for his interpretations of Shakespearean characters. His best-known screen roles are Dr. Zaius in the 1968 film \"Planet of the Apes\" and as Samantha Stephens's father, Maurice, on \"Bewitched\". Evans was born at 28 Icen Way (where there is now a memorial plaque, unveiled in 2013 by Tegen Evans, his great-great niece) in Dorchester, Dorset. He was the son of Laura (Turner) and Alfred Herbert Evans, a Welsh dispensing chemist and keen amateur actor who made adaptations of novels by Thomas Hardy",
163
- "score": 0.734375,
164
- "answers_found": [
165
- 0,
166
- 0
167
- ],
168
- "rec_score": 0.0
169
- },
170
- {
171
- "id": "823444",
172
- "title": "Ricardo Montalba\u0301n",
173
- "text": "was played by Andy Garc\u00eda. Ricardo Montalb\u00e1n Ricardo Gonzalo Pedro Montalb\u00e1n y Merino, (; ; November 25, 1920 \u2013 January 14, 2009) was a Mexican actor. His career spanned seven decades, during which he became known for many different performances in a variety of genres, from crime and drama to musicals and comedy. Among his notable roles was Armando in the \"Planet of the Apes\" film series from the early 1970s, where he starred in \"Escape from the Planet of the Apes\" (1971) and \"Conquest of the Planet of the Apes\" (1972). Ricardo Montalb\u00e1n played Mr. Roarke on the television",
174
- "score": 0.7314453125,
175
- "answers_found": [
176
- 0,
177
- 0
178
- ],
179
- "rec_score": 0.0
180
- }
181
- ]
182
- }
183
- ]
184
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt DELETED
@@ -1,16 +0,0 @@
1
- gradio==5.5.0
2
- spaces==0.30.3
3
- PyMuPDF==1.24.14
4
- llama-index==0.12.1
5
- llama-index-vector-stores-faiss==0.3.0
6
- chromadb==0.5.20
7
- llama-index-vector-stores-chroma==0.4.0
8
- llama-index-embeddings-huggingface==0.4.0
9
- vllm==0.6.2
10
- sentence-transformers==2.7.0
11
- arxiv
12
- ragatouille
13
- hf_transfer
14
- colorlog
15
- accelerate==1.1.1
16
- faiss-gpu
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
sample_outputs.json DELETED
@@ -1 +0,0 @@
1
- {"search_placeholder": "### 10 Oct 2023 | [Mistral 7B](https://arxiv.org/abs/2310.06825) | [\u2b07\ufe0f](https://arxiv.org/pdf/2310.06825)\n*Albert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, L'elio Renard Lavaud, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timoth'ee Lacroix, William El Sayed* \n\nWe introduce Mistral 7B v0.1, a 7-billion-parameter language model engineered\nfor superior performance and efficiency. Mistral 7B outperforms Llama 2 13B\nacross all evaluated benchmarks, and Llama 1 34B in reasoning, mathematics, and\ncode generation. Our model leverages grouped-query attention (GQA) for faster\ninference, coupled with sliding window attention (SWA) to effectively handle\nsequences of arbitrary length with a reduced inference cost. We also provide a\nmodel fine-tuned to follow instructions, Mistral 7B -- Instruct, that surpasses\nthe Llama 2 13B -- Chat model both on human and automated benchmarks. Our\nmodels are released under the Apache 2.0 license.\n", "output_placeholder": "Mistral is a 7-billion-parameter language model engineered for superior performance and efficiency. It was introduced in the paper \"Mistral 7B: A Superior Large Language Model\" [1]. Mistral outperforms other language models like Llama 2 13B and Llama 1 34B in various benchmarks, including reasoning, mathematics, and code generation. The model uses grouped-query attention (GQA) and sliding window attention (SWA) for faster inference and handling sequences of arbitrary length with reduced inference cost. Additionally, a fine-tuned version of Mistral, Mistral 7B -- Instruct, was released, which surpasses Llama 2 13B -- Chat model on human and automated benchmarks [1]. \n[1] Mistral 7B: A Superior Large Language Model. (2023). Retrieved from https://arxiv.org/abs/2303.14311."}
 
 
utils.py DELETED
@@ -1,327 +0,0 @@
1
- import spaces
2
- import datetime
3
- import string
4
-
5
- import nltk
6
-
7
- nltk.download('stopwords')
8
- from nltk.corpus import stopwords
9
-
10
- stop_words = stopwords.words('english')
11
- import time
12
-
13
- import arxiv
14
- import colorlog
15
- import torch
16
-
17
- fmt_string = '%(log_color)s %(asctime)s - %(levelname)s - %(message)s'
18
- log_colors = {
19
- 'DEBUG': 'white',
20
- 'INFO': 'green',
21
- 'WARNING': 'yellow',
22
- 'ERROR': 'red',
23
- 'CRITICAL': 'purple'
24
- }
25
- colorlog.basicConfig(log_colors=log_colors, format=fmt_string, level=colorlog.INFO)
26
- logger = colorlog.getLogger(__name__)
27
- logger.setLevel(colorlog.INFO)
28
-
29
-
30
-
31
- def get_md_text_abstract(rag_answer, source = ['Arxiv Search', 'Semantic Search'][1], return_prompt_formatting = False):
32
- if 'Semantic Search' in source:
33
- title = rag_answer['document_metadata']['title'].replace('\n','')
34
- #score = round(rag_answer['score'], 2)
35
- date = rag_answer['document_metadata']['_time']
36
- paper_abs = rag_answer['content']
37
- authors = rag_answer['document_metadata']['authors'].replace('\n','')
38
- doc_id = rag_answer['document_id']
39
- paper_link = f'''https://arxiv.org/abs/{doc_id}'''
40
- download_link = f'''https://arxiv.org/pdf/{doc_id}'''
41
-
42
- elif 'Arxiv' in source:
43
- title = rag_answer.title
44
- date = rag_answer.updated.strftime('%d %b %Y')
45
- paper_abs = rag_answer.summary.replace('\n',' ') + '\n'
46
- authors = ', '.join([author.name for author in rag_answer.authors])
47
- paper_link = rag_answer.links[0].href
48
- download_link = rag_answer.links[1].href
49
-
50
- else:
51
- raise Exception
52
-
53
- paper_title = f'''### {date} | [{title}]({paper_link}) | [⬇️]({download_link})\n'''
54
- authors_formatted = f'*{authors}*' + ' \n\n'
55
-
56
- md_text_formatted = paper_title + authors_formatted + paper_abs + '\n---------------\n'+ '\n'
57
- if return_prompt_formatting:
58
- doc = {
59
- 'title': title,
60
- 'text': paper_abs
61
- }
62
- return md_text_formatted, doc
63
-
64
- return md_text_formatted
65
-
66
- def remove_punctuation(text):
67
- punct_str = string.punctuation
68
- punct_str = punct_str.replace("'", "")
69
- return text.translate(str.maketrans("", "", punct_str))
70
-
71
- def remove_stopwords(text):
72
- text = ' '.join(word for word in text.split(' ') if word not in stop_words)
73
- return text
74
-
75
- def search_cleaner(text):
76
- new_text = text.lower()
77
- new_text = remove_stopwords(new_text)
78
- new_text = remove_punctuation(new_text)
79
- return new_text
80
-
81
-
82
- q = '(cat:cs.CV OR cat:cs.LG OR cat:cs.CL OR cat:cs.AI OR cat:cs.NE OR cat:cs.RO)'
83
-
84
-
85
- def get_arxiv_live_search(query, client, max_results = 10):
86
- clean_text = search_cleaner(query)
87
- search = arxiv.Search(
88
- query = clean_text + " AND "+q,
89
- max_results = max_results,
90
- sort_by = arxiv.SortCriterion.Relevance
91
- )
92
- results = client.results(search)
93
- all_results = list(results)
94
- return all_results
95
-
96
-
97
- def make_doc_prompt(doc, doc_id, doc_prompt, use_shorter=None):
98
- # For doc prompt:
99
- # - {ID}: doc id (starting from 1)
100
- # - {T}: title
101
- # - {P}: text
102
- # use_shorter: None, "summary", or "extraction"
103
-
104
- text = doc['text']
105
- if use_shorter is not None:
106
- text = doc[use_shorter]
107
- return doc_prompt.replace("{T}", doc["title"]).replace("{P}", text).replace("{ID}", str(doc_id+1))
108
-
109
-
110
- def get_shorter_text(item, docs, ndoc, key):
111
- doc_list = []
112
- for item_id, item in enumerate(docs):
113
- if key not in item:
114
- if len(doc_list) == 0:
115
- # If there aren't any document, at least provide one (using full text)
116
- item[key] = item['text']
117
- doc_list.append(item)
118
- logger.warn(f"No {key} found in document. It could be this data do not contain {key} or previous documents are not relevant. This is document {item_id}. This question will only have {len(doc_list)} documents.")
119
- break
120
- if "irrelevant" in item[key] or "Irrelevant" in item[key]:
121
- continue
122
- doc_list.append(item)
123
- if len(doc_list) >= ndoc:
124
- break
125
- return doc_list
126
-
127
-
128
- def make_demo(item, prompt, ndoc=None, doc_prompt=None, instruction=None, use_shorter=None, test=False):
129
- # For demo prompt
130
- # - {INST}: the instruction
131
- # - {D}: the documents
132
- # - {Q}: the question
133
- # - {A}: the answers
134
- # ndoc: number of documents to put in context
135
- # use_shorter: None, "summary", or "extraction"
136
-
137
- prompt = prompt.replace("{INST}", instruction).replace("{Q}", item['question'])
138
- if "{D}" in prompt:
139
- if ndoc == 0:
140
- prompt = prompt.replace("{D}\n", "") # if there is no doc we also delete the empty line
141
- else:
142
- doc_list = get_shorter_text(item, item["docs"], ndoc, use_shorter) if use_shorter is not None else item["docs"][:ndoc]
143
- text = "".join([make_doc_prompt(doc, doc_id, doc_prompt, use_shorter=use_shorter) for doc_id, doc in enumerate(doc_list)])
144
- prompt = prompt.replace("{D}", text)
145
-
146
- if not test:
147
- answer = "\n" + "\n".join(item["answer"]) if isinstance(item["answer"], list) else item["answer"]
148
- prompt = prompt.replace("{A}", "").rstrip() + answer
149
- else:
150
- prompt = prompt.replace("{A}", "").rstrip() # remove any space or \n
151
-
152
- return prompt
153
-
154
-
155
- def load_llama_guard(model_id = "meta-llama/Llama-Guard-3-1B"):
156
- from transformers import AutoTokenizer, AutoModelForCausalLM
157
- dtype = torch.bfloat16
158
-
159
- logger.info("loading llama_guard")
160
- llama_guard_tokenizer = AutoTokenizer.from_pretrained(model_id)
161
- llama_guard = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype).to('cuda')
162
-
163
- # Get the id of the "unsafe" token, this will later be used to extract its probability
164
- UNSAFE_TOKEN_ID = llama_guard_tokenizer.convert_tokens_to_ids("unsafe")
165
-
166
- return llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID
167
-
168
-
169
- # @spaces.GPU(duration=120)
170
- def moderate(chat, model, tokenizer, UNSAFE_TOKEN_ID):
171
-
172
- prompt = tokenizer.apply_chat_template(chat, return_tensors="pt", tokenize=False)
173
- # Skip the generation of whitespace.
174
- # Now the next predicted token will be either "safe" or "unsafe"
175
- prompt += "\n\n"
176
-
177
- inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
178
- outputs = model.generate(
179
- **inputs,
180
- max_new_tokens=50,
181
- return_dict_in_generate=True,
182
- pad_token_id=tokenizer.eos_token_id,
183
- output_logits=True, # get logits
184
- do_sample=False,
185
- temperature=None,
186
- top_p=None
187
- )
188
- ######
189
- # Get generated text
190
- ######
191
- # logger.info(outputs)
192
- # Number of tokens that correspond to the input prompt
193
- input_length = inputs.input_ids.shape[1]
194
- # Ignore the tokens from the input to get the tokens generated by the model
195
- generated_token_ids = outputs.sequences[:, input_length:].cpu()
196
- logger.info(generated_token_ids)
197
- generated_text = tokenizer.decode(generated_token_ids[0], skip_special_tokens=True)
198
- logger.info(generated_text)
199
- ######
200
- # Get Probability of "unsafe" token
201
- ######
202
-
203
- # First generated token is either "safe" or "unsafe".
204
- # use the logits to calculate the probabilities.
205
- first_token_logits = outputs.logits[0]
206
- first_token_probs = torch.softmax(first_token_logits, dim=-1)
207
-
208
- # From the probabilities of all tokens, extract the one for the "unsafe" token.
209
- unsafe_probability = first_token_probs[0, UNSAFE_TOKEN_ID]
210
- unsafe_probability = unsafe_probability.item()
211
-
212
- ########
213
- # Result
214
- ########
215
- return {
216
- "unsafe_score": unsafe_probability,
217
- "generated_text": generated_text
218
- }
219
-
220
-
221
-
222
- def get_max_memory():
223
- """Get the maximum memory available for the current GPU for loading models."""
224
- free_in_GB = int(torch.cuda.mem_get_info()[0]/1024**3)
225
- max_memory = f'{free_in_GB-1}GB'
226
- n_gpus = torch.cuda.device_count()
227
- max_memory = {i: max_memory for i in range(n_gpus)}
228
- return max_memory
229
-
230
-
231
- def load_model(model_name_or_path, dtype=torch.bfloat16, int8=False):
232
- # Load a huggingface model and tokenizer
233
- # dtype: torch.float16 or torch.bfloat16
234
- # int8: whether to use int8 quantization
235
- # reserve_memory: how much memory to reserve for the model on each gpu (in GB)
236
-
237
- # Load the FP16 model
238
- from transformers import AutoModelForCausalLM, AutoTokenizer
239
- logger.info(f"Loading {model_name_or_path} in {dtype}...")
240
- if int8:
241
- logger.warn("Use LLM.int8")
242
- start_time = time.time()
243
- model = AutoModelForCausalLM.from_pretrained(
244
- model_name_or_path,
245
- device_map='auto',
246
- torch_dtype=dtype,
247
- max_memory=get_max_memory(),
248
- load_in_8bit=int8,
249
- )
250
- logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
251
-
252
- # Load the tokenizer
253
- tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
254
-
255
- tokenizer.padding_side = "left"
256
-
257
- return model, tokenizer
258
-
259
-
260
- def load_vllm(model_name_or_path, dtype=torch.bfloat16):
261
- from vllm import LLM, SamplingParams
262
- logger.info(f"Loading {model_name_or_path} in {dtype}...")
263
- start_time = time.time()
264
- model = LLM(
265
- model_name_or_path,
266
- dtype=dtype,
267
- gpu_memory_utilization=0.9,
268
- max_seq_len_to_capture=2048,
269
- max_model_len=8192,
270
- )
271
- sampling_params = SamplingParams(temperature=0.1, top_p=0.95, max_tokens=300)
272
- logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
273
-
274
- # Load the tokenizer
275
- tokenizer = model.get_tokenizer()
276
-
277
- tokenizer.padding_side = "left"
278
-
279
- return model, tokenizer, sampling_params
280
-
281
-
282
-
283
- class LLM:
284
-
285
- def __init__(self, model_name_or_path, use_vllm=True):
286
- self.use_vllm = use_vllm
287
- if use_vllm:
288
- self.chat_llm, self.tokenizer, self.sampling_params = load_vllm(model_name_or_path)
289
- else:
290
- self.chat_llm, self.tokenizer = load_model(model_name_or_path)
291
-
292
- self.prompt_exceed_max_length = 0
293
- self.fewer_than_50 = 0
294
-
295
- def generate(self, prompt, max_tokens=300, stop=None):
296
- if max_tokens <= 0:
297
- self.prompt_exceed_max_length += 1
298
- logger.warning("Prompt exceeds max length and return an empty string as answer. If this happens too many times, it is suggested to make the prompt shorter")
299
- return ""
300
- if max_tokens < 50:
301
- self.fewer_than_50 += 1
302
- logger.warning("The model can at most generate < 50 tokens. If this happens too many times, it is suggested to make the prompt shorter")
303
-
304
- if self.use_vllm:
305
- inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], add_generation_prompt=True, tokenize=False)
306
- self.sampling_params.n = 1 # Number of output sequences to return for the given prompt
307
- self.sampling_params.stop_token_ids = [self.chat_llm.llm_engine.get_model_config().hf_config.eos_token_id]
308
- self.sampling_params.max_tokens = max_tokens
309
- output = self.chat_llm.generate(
310
- inputs,
311
- self.sampling_params,
312
- use_tqdm=True,
313
- )
314
- generation = output[0].outputs[0].text.strip()
315
-
316
- else:
317
- inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], add_generation_prompt=True, return_dict=True, return_tensors="pt").to(self.chat_llm.device)
318
- outputs = self.chat_llm.generate(
319
- **inputs,
320
- do_sample=True, temperature=0.1, top_p=0.95,
321
- max_new_tokens=max_tokens,
322
- num_return_sequences=1,
323
- eos_token_id=[self.chat_llm.config.eos_token_id]
324
- )
325
- generation = self.tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True).strip()
326
-
327
- return generation