Spaces:

declare-lab
/

JAM

Running on Zero

App Files Files Community

OrangeEye commited on Nov 28, 2024

Commit

a5d6d73

0 Parent(s):

update Trust-Align

Browse files

Files changed (8) hide show

.gitattributes +37 -0
README.md +14 -0
__pycache__/utils.cpython-310.pyc +0 -0
app.py +246 -0
rejection_full.json +184 -0
requirements.txt +15 -0
sample_outputs.json +1 -0
utils.py +321 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+colbert/indexes/arxiv_colbert/collection.json filter=lfs diff=lfs merge=lfs -text
+colbert/indexes/arxiv_colbert/docid_metadata_map.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Trust-Align
+emoji: 🔥
+colorFrom: blue
+colorTo: blue
+sdk: gradio
+sdk_version: 4.37.2
+app_file: app.py
+pinned: false
+short_description: Measuring and Enhancing Trustworthiness of LLMs in RAG through Grounded Attributions and Learning to Refuse
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Index Last Updated : 2024-11-16

__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (8.96 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,246 @@

+# import os
+# # Set CUDA device dynamically
+# os.environ["CUDA_VISIBLE_DEVICES"] = "5"
+import spaces
+import torch
+import transformers
+import gradio as gr
+from ragatouille import RAGPretrainedModel
+from huggingface_hub import InferenceClient
+import re
+from datetime import datetime
+import json
+import os
+import arxiv
+from utils import get_md_text_abstract, search_cleaner, get_arxiv_live_search, make_demo, make_doc_prompt, load_llama_guard, moderate, LLM
+global MODEL, CURRENT_MODEL
+MODEL, CURRENT_MODEL = None, None
+retrieve_results = 10
+show_examples = True
+llm_models_to_choose = ['Trust-Align-Qwen2.5', "meta-llama/Meta-Llama-3-8B-Instruct",'None']
+llm_location_map={
+    "Trust-Align-Qwen2.5": os.getenv("MODEL_NAME"),
+    "meta-llama/Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",  # "Qwen/Qwen2.5-7B-Instruct"
+    "None": None
+}
+generate_kwargs = dict(
+    temperature = 0.1,
+    max_new_tokens = 512,
+    top_p = 1.0,
+    do_sample = True,
+)
+# Load llama Guard
+llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID = load_llama_guard("meta-llama/Llama-Guard-3-1B")
+## RAG MODEL
+RAG = RAGPretrainedModel.from_index("colbert/indexes/arxiv_colbert")
+try:
+  gr.Info("Setting up retriever, please wait...")
+  rag_initial_output = RAG.search("what is Mistral?", k = 1)
+  gr.Info("Retriever working successfully!")
+except:
+  gr.Warning("Retriever not working!")
+def choose_llm(choosed_llm):
+    global MODEL, CURRENT_MODEL
+    try:
+        gr.Info("Setting up LLM, please wait...")
+        MODEL = LLM(llm_location_map[choosed_llm], use_vllm=False)
+        CURRENT_MODEL = choosed_llm
+        gr.Info("LLM working successfully!")
+    except Exception as e:
+        raise RuntimeError("Failed to load the LLM MODEL.") from e
+choose_llm(llm_models_to_choose[0])
+# prompt used for generation
+try:
+    with open("rejection_full.json") as f:
+        prompt_data = json.load(f)
+except FileNotFoundError:
+    raise RuntimeError("Prompt data file 'rejection_full.json' not found.")
+except json.JSONDecodeError:
+    raise RuntimeError("Failed to decode 'rejection_full.json'.")
+## Header
+mark_text = '# 🔍 Search Results\n'
+header_text = "# 🤖 Trust-Align: Measuring and Enhancing Trustworthiness of LLMs in RAG through Grounded Attributions and Learning to Refuse\n \n"
+try:
+  with open("README.md", "r") as f:
+      mdfile = f.read()
+  date_pattern = r'Index Last Updated : \d{4}-\d{2}-\d{2}'
+  match = re.search(date_pattern, mdfile)
+  date = match.group().split(': ')[1]
+  formatted_date = datetime.strptime(date, '%Y-%m-%d').strftime('%d %b %Y')
+  header_text += f'Index Last Updated: {formatted_date}\n'
+  index_info = f"Semantic Search - up to {formatted_date}"
+except:
+  index_info = "Semantic Search"
+database_choices = [index_info,'Arxiv Search - Latest - (EXPERIMENTAL)']
+## Arxiv API
+arx_client = arxiv.Client()
+is_arxiv_available = True
+check_arxiv_result = get_arxiv_live_search("What is Mistral?", arx_client, retrieve_results)
+if len(check_arxiv_result) == 0:
+  is_arxiv_available = False
+  print("Arxiv search not working, switching to default search ...")
+  database_choices = [index_info]
+## Show examples (disabled)
+if show_examples:
+    with open("sample_outputs.json", "r") as f:
+      sample_outputs = json.load(f)
+    output_placeholder = sample_outputs['output_placeholder']
+    md_text_initial = sample_outputs['search_placeholder']
+else:
+    output_placeholder = None
+    md_text_initial = ''
+def rag_cleaner(inp):
+    rank = inp['rank']
+    title = inp['document_metadata']['title']
+    content = inp['content']
+    date = inp['document_metadata']['_time']
+    return f"{rank}. <b> {title} </b> \n Date : {date} \n Abstract: {content}"
+def get_references(question, retriever, k = retrieve_results):
+    rag_out = retriever.search(query=question, k=k)
+    return rag_out
+def get_rag(message):
+    return get_references(message, RAG)
+with gr.Blocks(theme = gr.themes.Soft()) as demo:
+    header = gr.Markdown(header_text)
+    with gr.Group():
+      msg = gr.Textbox(label = 'Search', placeholder = 'What is Mistral?')
+      with gr.Accordion("Advanced Settings", open=False):
+        with gr.Row(equal_height = True):
+          llm_model = gr.Dropdown(choices = llm_models_to_choose, value = 'Trust-Align-Qwen2.5', label = 'LLM MODEL')
+          llm_results = gr.Slider(minimum=1, maximum=retrieve_results, value=3, step=1, interactive=True, label="Top n results as context")
+          database_src = gr.Dropdown(choices = database_choices, value = index_info, label = 'Search Source')
+          stream_results = gr.Checkbox(value = True, label = "Stream output", visible = False)
+    output_text = gr.Textbox(show_label = True, container = True, label = 'LLM Answer', visible = True, placeholder = output_placeholder)
+    input = gr.Textbox(visible=False)  # placeholder
+    gr_md = gr.Markdown(mark_text + md_text_initial)
+    def update_with_rag_md(message, llm_results_use = 5, database_choice = index_info, llm_model_picked = 'Trust-Align-Qwen2.5'):
+        chat_round = [
+            {"role": "user",
+            "content": [
+                {"type": "text",
+                "text": message
+                }
+                ]
+            }
+        ]
+        # llama guard check for it
+        prompt_safety = moderate(chat_round, llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID)['generated_text']
+        if prompt_safety == "safe":
+            docs = []
+            database_to_use = database_choice
+            if database_choice == index_info:
+                rag_out = get_rag(message)
+            else:
+                arxiv_search_success = True
+                try:
+                    rag_out = get_arxiv_live_search(message, arx_client, retrieve_results)
+                    if len(rag_out) == 0:
+                        arxiv_search_success = False
+                except:
+                    arxiv_search_success = False
+                if not arxiv_search_success:
+                    gr.Warning("Arxiv Search not working, switching to semantic search ...")
+                    rag_out = get_rag(message)
+                    database_to_use = index_info
+            md_text_updated = mark_text
+            for i in range(retrieve_results):
+                rag_answer = rag_out[i]
+                if i < llm_results_use:
+                    md_text_paper, doc = get_md_text_abstract(rag_answer, source = database_to_use, return_prompt_formatting = True)
+                    docs.append(doc)
+                    md_text_paper = md_text_paper.strip("###")
+                    md_text_updated += f"### [{i+1}] {md_text_paper}"
+                # else:
+                #     md_text_paper = get_md_text_abstract(rag_answer, source = database_to_use)
+                # md_text_updated += md_text_paper
+            infer_item = {
+                "question": message,
+                "docs": docs,
+            }
+            prompt = make_demo(
+                infer_item,
+                prompt=prompt_data["demo_prompt"],
+                ndoc=llm_results_use,
+                doc_prompt=prompt_data["doc_prompt"],
+                instruction=prompt_data["instruction"],
+                test=True
+            )
+        else:
+            md_text_updated = mark_text + "### Invalid search query!"
+            prompt = ""
+        return md_text_updated, prompt
+    @spaces.GPU(duration=60)
+    def ask_llm(prompt, llm_model_picked = 'Trust-Align-Qwen2.5', stream_outputs = False):
+        model_disabled_text = "LLM MODEL is disabled"
+        output = ""
+        if llm_model_picked == 'None':
+            if stream_outputs:
+                for out in model_disabled_text:
+                    output += out
+                    yield output
+                return output
+            else:
+                return model_disabled_text
+        global MODEL
+        if llm_model_picked != CURRENT_MODEL:
+            del MODEL
+            import gc
+            gc.collect
+            torch.cuda.empty_cache()
+            choose_llm(llm_model_picked)
+        try:
+            stream = MODEL.generate(prompt, generate_kwargs["max_new_tokens"])
+        except:
+            gr.Warning("LLM Inference rate limit reached, try again later!")
+            return ""
+        if stream_outputs:
+            for response in stream:
+                output += response
+                yield output
+            return output
+        else:
+            return output
+    msg.submit(update_with_rag_md, [msg, llm_results,  database_src, llm_model], [gr_md, input]).success(ask_llm, [input, llm_model, stream_results], output_text)
+demo.queue().launch()

rejection_full.json ADDED Viewed

	@@ -0,0 +1,184 @@

+{
+    "instruction": "Instruction: Write an accurate, engaging, and concise answer for the given question using only the provided search results (some of which might be irrelevant) and cite them properly. Use an unbiased and journalistic tone. Always cite for any factual claim. When citing several search results, use [1][2][3]. Cite at least one document and at most three documents in each sentence. If multiple documents support the sentence, only cite a minimum sufficient subset of the documents. If none of the provided documents contain the answer, only respond with \"I apologize, but I couldn't find an answer to your question in the search results.\". Then, add further explanation as to why an answer cannot be provided.",
+    "demo_sep": "\n\n\n",
+    "demo_prompt": "{INST}\n\nQuestion: {Q}\n\n{D}\nAnswer: {A}",
+    "doc_prompt": "Document [{ID}](Title: {T}): {P}\n",
+    "positive_demos": [
+        {
+            "question": "Which is the most rainy place on earth?",
+            "answer": "Several places on Earth claim to be the most rainy, such as Lloró, Colombia, which reported an average annual rainfall of 12,717 mm between 1952 and 1989, and López de Micay, Colombia, which reported an annual 12,892 mm between 1960 and 2012 [3]. However, the official record is held by Mawsynram, India with an average annual rainfall of 11,872 mm [3], although nearby town Sohra, India, also known as Cherrapunji, holds the record for most rain in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861 [1].",
+            "docs": [
+                {
+                    "title": "Cherrapunji",
+                    "text": "Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall in a calendar month for July 1861 and most rain in a year from August 1860 to July 1861, however: it received in"
+                },
+                {
+                    "title": "Cherrapunji",
+                    "text": "Radio relay station known as Akashvani Cherrapunji. It broadcasts on FM frequencies. Cherrapunji Cherrapunji (; with the native name Sohra being more commonly used, and can also be spelled Cherrapunjee or Cherrapunji) is a subdivisional town in the East Khasi Hills district in the Indian state of Meghalaya. It is the traditional capital of aNongkhlaw \"hima\" (Khasi tribal chieftainship constituting a petty state), both known as Sohra or Churra. Cherrapunji has often been credited as being the wettest place on Earth, but for now nearby Mawsynram currently holds that distinction. Cherrapunji still holds the all-time record for the most rainfall"
+                },
+                {
+                    "title": "Mawsynram",
+                    "text": "Mawsynram Mawsynram () is a village in the East Khasi Hills district of Meghalaya state in north-eastern India, 65 kilometres from Shillong. Mawsynram receives one of the highest rainfalls in India. It is reportedly the wettest place on Earth, with an average annual rainfall of 11,872 mm, but that claim is disputed by Lloró, Colombia, which reported an average yearly rainfall of 12,717 mm between 1952 and 1989 and López de Micay, also in Colombia, which reported an annual 12,892 mm per year between 1960 and 2012. According to the \"Guinness Book of World Records\", Mawsynram received of rainfall in 1985. Mawsynram is located at 25° 18′"
+                },
+                {
+                    "title": "Earth rainfall climatology",
+                    "text": "Pacific Northwest, and the Sierra Nevada range are the wetter portions of the nation, with average rainfall exceeding per year. The drier areas are the Desert Southwest, Great Basin, valleys of northeast Arizona, eastern Utah, central Wyoming, eastern Oregon and Washington and the northeast of the Olympic Peninsula. The Big Bog on the island of Maui receives, on average, every year, making it the wettest location in the US, and all of Oceania. The annual average rainfall maxima across the continent lie across the northwest from northwest Brazil into northern Peru, Colombia, and Ecuador, then along the Atlantic coast of"
+                },
+                {
+                    "title": "Going to Extremes",
+                    "text": "in the world. Oymyakon in Siberia, where the average winter temperature is −47 °F (− 44 °C). Arica in Chile, where there had been fourteen consecutive years without rain. Fog is the only local source of water. Mawsynram in India, where average annual rainfall is 14 meters, falling within a four-month period in the monsoon season. The rainfall is approximately equal to that of its neighbor Cherrapunji. Dallol in Ethiopia, known as the 'Hell-hole of creation' where the temperature averages 94 °F (34 °C) over the year. In his second series, Middleton visited places without permanent towns, locations where \"survival\""
+                }
+            ]
+        },
+        {
+            "question": "When did the us break away from england?",
+            "answer": "The United States took the first step towards gaining independence from Great Britain when it declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, 1776, the date when the Declaration of Independence was officially adopted by Congress) [2]. The Treaty of Paris was later signed on September 3, 1783, formally separating the United States from the British Empire [3].",
+            "docs": [
+                {
+                    "title": "United States withdrawal from Saudi Arabia",
+                    "text": "United States withdrawal from Saudi Arabia Beginning during Operation Desert Shield in August 1990, while preparing for the Gulf War, the United States sent a large troop contingent to Saudi Arabia. After the war, remnant troops, primarily U.S. Air Force personnel, augmented by a smaller number of coordinating and training personnel from the U.S. Navy, U.S. Army and U.S. Marine Corps remained in Saudi Arabia under the aegis of Joint Task Force Southwest Asia (JTF-SWA), as part of Operation Southern Watch (OSW). The United Kingdom and France also maintained a small contingent of Royal Air Force and French Air Force"
+                },
+                {
+                    "title": "Decolonization of the Americas",
+                    "text": "and France has fully \"integrated\" most of its former colonies as fully constituent \"departments\" of France. The United States of America declared independence from Great Britain on July 2, 1776 (although the event is now commemorated on July 4, the date when the Declaration of Independence was officially adopted by Congress), in so doing becoming the first independent, foreign-recognized nation in the Americas and the first European colonial entity to break from its mother country. Britain formally acknowledged American independence in 1783 after its defeat in the American Revolutionary War. Although initially occupying only the land east of the Mississippi"
+                },
+                {
+                    "title": "American Revolution",
+                    "text": "second British army at Yorktown in the fall of 1781, effectively ending the war. The Treaty of Paris was signed September 3, 1783, formally ending the conflict and confirming the new nation's complete separation from the British Empire. The United States took possession of nearly all the territory east of the Mississippi River and south of the Great Lakes, with the British retaining control of Canada and Spain taking Florida. Among the significant results of the revolution was the creation of the United States Constitution, establishing a relatively strong federal national government that included an executive, a national judiciary, and"
+                },
+                {
+                    "title": "Decolonization",
+                    "text": "accelerate decolonialization and bring an end to the colonial empires of its Western allies, most importantly during the 1956 Suez Crisis, but American military bases were established around the world and direct and indirect interventions continued in Korea, Indochina, Latin America (\"inter alia\", the 1965 occupation of the Dominican Republic), Africa, and the Middle East to oppose Communist invasions and insurgencies. Since the dissolution of the Soviet Union, the United States has been far less active in the Americas, but invaded Afghanistan and Iraq following the September 11 attacks in 2001, establishing army and air bases in Central Asia. Before"
+                },
+                {
+                    "title": "Decolonization",
+                    "text": "the responsibility of the United Kingdom (with a copy of the new constitution annexed), and finally, if approved, issuance of an Order of Council fixing the exact date of independence. After World War I, several former German and Ottoman territories in the Middle East, Africa, and the Pacific were governed by the UK as League of Nations mandates. Some were administered directly by the UK, and others by British dominions – Nauru and the Territory of New Guinea by Australia, South West Africa by the Union of South Africa, and Western Samoa by New Zealand. Egypt became independent in 1922,"
+                }
+            ]
+        }
+    ],
+    "reject_demos": [
+        {
+            "question": "Who set the record for longest field goal?",
+            "answer": "I apologize, but I couldn't find an answer to your question in the search results.",
+            "docs": [
+                {
+                    "id": "5758609",
+                    "title": "Sebastian Janikowski",
+                    "text": "have broken the all-time NFL field goal record of 63 yards. However, it bounced off the right upright and came back out. On September 28, 2008, Janikowski unsuccessfully attempted a 76-yard field goal against the San Diego Chargers into the heavy wind right before halftime. This is presumed to be the longest attempt in NFL history; though the league keeps no such records on attempts, the longest known attempts previous to this were 74 yard attempts by Mark Moseley and Joe Danelo in 1979. On October 19, 2008, Janikowski broke his own Raiders team record, making a 57-yard field goal",
+                    "score": 0.78466796875,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                },
+                {
+                    "id": "12183799",
+                    "title": "Dirk Borgognone",
+                    "text": "Dirk Borgognone Dirk Ronald Borgognone (born January 9, 1968) is a former National Football League placekicker who currently holds the record for the longest field goal ever kicked in the history of high school football, 68 yards. Borgognone attended Reno High School, initially playing as a soccer player. He soon switched to football and was trained in a \"straight-on\" kicking style. On September 27, 1985, he kicked the longest field goal in high school football history, during a Reno High School game at Sparks High School. The kick measured 68 yards and was longer than any that had ever been",
+                    "score": 0.7822265625,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                },
+                {
+                    "id": "6048593",
+                    "title": "Russell Erxleben",
+                    "text": "against the University of Colorado. Despite early troubles in the game, including a missed field goal and a blocked extra point, Erxleben kicked a field goal late in the game, breaking the tie and securing the win for Texas. In 1977, in a game against Rice University, he set the record for the longest field goal in NCAA history with a 67-yard kick. UT head coach Fred Akers said of the kick, \"It was like a gunshot. We couldn't believe a ball was going that far. It had another eight yards on it.\" Erxleben kicked two other field goals over",
+                    "score": 0.7705078125,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                },
+                {
+                    "id": "9303115",
+                    "title": "Field goal",
+                    "text": "to Dempsey's 1970 kick, the longest field goal in NFL history was 56 yards, by Bert Rechichar of the Baltimore Colts A 55-yard field goal, achieved by a drop kick, was recorded by Paddy Driscoll in , and stood as the unofficial record until that point; some sources indicate a 54-yarder by Glenn Presnell in as the due to the inability to precisely verify Driscoll's kick. In a pre-season NFL game between the Denver Broncos and the Seattle Seahawks on August 29, 2002, Ola Kimrin kicked a 65-yard field goal. However, because pre-season games are not counted toward official records,",
+                    "score": 0.76513671875,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                },
+                {
+                    "id": "4853018",
+                    "title": "Steve Christie",
+                    "text": "Canton. He then went on to kick five field goals in the Bills 29-10 win over the Miami Dolphins in the AFC championship game, helping Buffalo get to their third consecutive Super Bowl. In 1993, Christie set a Bills record by kicking a 59-yard field goal in a regular season game. It was only four yards short of the all-time NFL record. In Super Bowl XXVIII, Christie set a Super Bowl record by kicking a 54-yard field goal. It is currently the longest field goal ever made in Super Bowl History. In the 2000 season, Christie was an instrumental part",
+                    "score": 0.75341796875,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                }
+            ]
+        },
+        {
+            "question": "Who played galen in planet of the apes?",
+            "answer": "I apologize, but I couldn't find an answer to your question in the search results.",
+            "docs": [
+                {
+                    "id": "12677620",
+                    "title": "Planet of the Apes (2001 film)",
+                    "text": "be the Lincoln Memorial, only to find that it is now a monument to General Thade. A swarm of police officers, firefighters, and news reporters descend on Leo, all of whom are apes. Small roles include David Warner (Senator Sandar), Lisa Marie (Nova), Erick Avari (Tival), Luke Eberl (Birn), Evan Parke (Gunnar), Glenn Shadix (Senator Nado), Freda Foh Shen (Bon), Chris Ellis (Lt. Gen. Karl Vasich) and Anne Ramsay (Lt. Col. Grace Alexander). There are also cameo appearances by Charlton Heston (uncredited) as Zaius, Thade's father, and Linda Harrison (the woman in the cart). Both participated in two original films",
+                    "score": 0.7529296875,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                },
+                {
+                    "id": "3943319",
+                    "title": "Severn Darden",
+                    "text": "Severn Darden Severn Teakle Darden Jr. (November 9, 1929 \u2013 May 27, 1995) was an American comedian and actor, and an original member of The Second City Chicago-based comedy troupe as well as its predecessor, the Compass Players. He is perhaps best known from his film appearances for playing the human leader Kolp in the fourth and fifth \"Planet of the Apes\" films. Born in New Orleans, Louisiana, he attended the University of Chicago. Darden\u2019s offbeat and intellectual sense of humor, appropriate for someone who attended the University of Chicago and in fact a major element in the style of",
+                    "score": 0.74267578125,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                },
+                {
+                    "id": "13813715",
+                    "title": "Planet of the Apes",
+                    "text": "film stars Mark Wahlberg as astronaut Leo Davidson, who accidentally travels through a wormhole to a distant planet where talking apes enslave humans. He leads a human revolt and upends ape civilization by discovering that the apes evolved from the normal earth primates who had accompanied his mission, and arrived years before. Helena Bonham Carter played chimpanzee Ari, while Tim Roth played the human-hating chimpanzee General Thade. The film received mixed reviews; most critics believed it failed to compare to the original. Much of the negative commentary focused on the confusing plot and twist ending, though many reviewers praised the",
+                    "score": 0.74169921875,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                },
+                {
+                    "id": "3386349",
+                    "title": "Maurice Evans (actor)",
+                    "text": "Maurice Evans (actor) Maurice Herbert Evans (3 June 1901 \u2013 12 March 1989) was a British actor, noted for his interpretations of Shakespearean characters. His best-known screen roles are Dr. Zaius in the 1968 film \"Planet of the Apes\" and as Samantha Stephens's father, Maurice, on \"Bewitched\". Evans was born at 28 Icen Way (where there is now a memorial plaque, unveiled in 2013 by Tegen Evans, his great-great niece) in Dorchester, Dorset. He was the son of Laura (Turner) and Alfred Herbert Evans, a Welsh dispensing chemist and keen amateur actor who made adaptations of novels by Thomas Hardy",
+                    "score": 0.734375,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                },
+                {
+                    "id": "823444",
+                    "title": "Ricardo Montalba\u0301n",
+                    "text": "was played by Andy Garc\u00eda. Ricardo Montalb\u00e1n Ricardo Gonzalo Pedro Montalb\u00e1n y Merino, (; ; November 25, 1920 \u2013 January 14, 2009) was a Mexican actor. His career spanned seven decades, during which he became known for many different performances in a variety of genres, from crime and drama to musicals and comedy. Among his notable roles was Armando in the \"Planet of the Apes\" film series from the early 1970s, where he starred in \"Escape from the Planet of the Apes\" (1971) and \"Conquest of the Planet of the Apes\" (1972). Ricardo Montalb\u00e1n played Mr. Roarke on the television",
+                    "score": 0.7314453125,
+                    "answers_found": [
+                        0,
+                        0
+                    ],
+                    "rec_score": 0.0
+                }
+            ]
+        }
+    ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+gradio==5.5.0
+spaces==0.30.3
+PyMuPDF==1.24.14
+llama-index==0.12.1
+llama-index-vector-stores-faiss==0.3.0
+chromadb==0.5.20
+llama-index-vector-stores-chroma==0.4.0
+llama-index-embeddings-huggingface==0.4.0
+vllm==0.6.2
+sentence-transformers==2.7.0
+arxiv
+ragatouille
+hf_transfer
+colorlog
+accelerate==1.1.1

sample_outputs.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"search_placeholder": "### 10 Oct 2023 | [Mistral 7B](https://arxiv.org/abs/2310.06825) | [\u2b07\ufe0f](https://arxiv.org/pdf/2310.06825)\n*Albert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, L'elio Renard Lavaud, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timoth'ee Lacroix, William El Sayed* \n\nWe introduce Mistral 7B v0.1, a 7-billion-parameter language model engineered\nfor superior performance and efficiency. Mistral 7B outperforms Llama 2 13B\nacross all evaluated benchmarks, and Llama 1 34B in reasoning, mathematics, and\ncode generation. Our model leverages grouped-query attention (GQA) for faster\ninference, coupled with sliding window attention (SWA) to effectively handle\nsequences of arbitrary length with a reduced inference cost. We also provide a\nmodel fine-tuned to follow instructions, Mistral 7B -- Instruct, that surpasses\nthe Llama 2 13B -- Chat model both on human and automated benchmarks. Our\nmodels are released under the Apache 2.0 license.\n", "output_placeholder": "Mistral is a 7-billion-parameter language model engineered for superior performance and efficiency. It was introduced in the paper \"Mistral 7B: A Superior Large Language Model\" [1]. Mistral outperforms other language models like Llama 2 13B and Llama 1 34B in various benchmarks, including reasoning, mathematics, and code generation. The model uses grouped-query attention (GQA) and sliding window attention (SWA) for faster inference and handling sequences of arbitrary length with reduced inference cost. Additionally, a fine-tuned version of Mistral, Mistral 7B -- Instruct, was released, which surpasses Llama 2 13B -- Chat model on human and automated benchmarks [1]. \n[1] Mistral 7B: A Superior Large Language Model. (2023). Retrieved from https://arxiv.org/abs/2303.14311."}

utils.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import datetime
+import string
+import nltk
+nltk.download('stopwords')
+from nltk.corpus import stopwords
+stop_words = stopwords.words('english')
+import time
+import arxiv
+import colorlog
+import torch
+fmt_string = '%(log_color)s %(asctime)s - %(levelname)s - %(message)s'
+log_colors = {
+        'DEBUG': 'white',
+        'INFO': 'green',
+        'WARNING': 'yellow',
+        'ERROR': 'red',
+        'CRITICAL': 'purple'
+        }
+colorlog.basicConfig(log_colors=log_colors, format=fmt_string, level=colorlog.INFO)
+logger = colorlog.getLogger(__name__)
+logger.setLevel(colorlog.INFO)
+def get_md_text_abstract(rag_answer, source = ['Arxiv Search', 'Semantic Search'][1], return_prompt_formatting = False):
+    if 'Semantic Search' in source:
+      title = rag_answer['document_metadata']['title'].replace('\n','')
+      #score = round(rag_answer['score'], 2)
+      date = rag_answer['document_metadata']['_time']
+      paper_abs = rag_answer['content']
+      authors = rag_answer['document_metadata']['authors'].replace('\n','')
+      doc_id = rag_answer['document_id']
+      paper_link = f'''https://arxiv.org/abs/{doc_id}'''
+      download_link = f'''https://arxiv.org/pdf/{doc_id}'''
+    elif 'Arxiv' in source:
+      title = rag_answer.title
+      date = rag_answer.updated.strftime('%d %b %Y')
+      paper_abs = rag_answer.summary.replace('\n',' ') + '\n'
+      authors = ', '.join([author.name for author in rag_answer.authors])
+      paper_link = rag_answer.links[0].href
+      download_link = rag_answer.links[1].href
+    else:
+      raise Exception
+    paper_title = f'''### {date} | [{title}]({paper_link}) | [⬇️]({download_link})\n'''
+    authors_formatted = f'*{authors}*' + ' \n\n'
+    md_text_formatted = paper_title + authors_formatted + paper_abs +  '\n---------------\n'+ '\n'
+    if return_prompt_formatting:
+        doc = {
+            'title': title,
+            'text': paper_abs
+        }
+        return md_text_formatted, doc
+    return md_text_formatted
+def remove_punctuation(text):
+    punct_str = string.punctuation
+    punct_str = punct_str.replace("'", "")
+    return text.translate(str.maketrans("", "", punct_str))
+def remove_stopwords(text):
+    text = ' '.join(word for word in text.split(' ') if word not in stop_words)
+    return text
+def search_cleaner(text):
+    new_text = text.lower()
+    new_text = remove_stopwords(new_text)
+    new_text = remove_punctuation(new_text)
+    return new_text
+q = '(cat:cs.CV OR cat:cs.LG OR cat:cs.CL OR cat:cs.AI OR cat:cs.NE OR cat:cs.RO)'
+def get_arxiv_live_search(query, client, max_results = 10):
+  clean_text = search_cleaner(query)
+  search = arxiv.Search(
+    query = clean_text + " AND "+q,
+    max_results = max_results,
+    sort_by = arxiv.SortCriterion.Relevance
+  )
+  results = client.results(search)
+  all_results = list(results)
+  return all_results
+def make_doc_prompt(doc, doc_id, doc_prompt, use_shorter=None):
+    # For doc prompt:
+    # - {ID}: doc id (starting from 1)
+    # - {T}: title
+    # - {P}: text
+    # use_shorter: None, "summary", or "extraction"
+    text = doc['text']
+    if use_shorter is not None:
+        text = doc[use_shorter]
+    return doc_prompt.replace("{T}", doc["title"]).replace("{P}", text).replace("{ID}", str(doc_id+1))
+def get_shorter_text(item, docs, ndoc, key):
+    doc_list = []
+    for item_id, item in enumerate(docs):
+        if key not in item:
+            if len(doc_list) == 0:
+                # If there aren't any document, at least provide one (using full text)
+                item[key] = item['text']
+                doc_list.append(item)
+            logger.warn(f"No {key} found in document. It could be this data do not contain {key} or previous documents are not relevant. This is document {item_id}. This question will only have {len(doc_list)} documents.")
+            break
+        if "irrelevant" in item[key] or "Irrelevant" in item[key]:
+            continue
+        doc_list.append(item)
+        if len(doc_list) >= ndoc:
+            break
+    return doc_list
+def make_demo(item, prompt, ndoc=None, doc_prompt=None, instruction=None, use_shorter=None, test=False):
+    # For demo prompt
+    # - {INST}: the instruction
+    # - {D}: the documents
+    # - {Q}: the question
+    # - {A}: the answers
+    # ndoc: number of documents to put in context
+    # use_shorter: None, "summary", or "extraction"
+    prompt = prompt.replace("{INST}", instruction).replace("{Q}", item['question'])
+    if "{D}" in prompt:
+        if ndoc == 0:
+            prompt = prompt.replace("{D}\n", "") # if there is no doc we also delete the empty line
+        else:
+            doc_list = get_shorter_text(item, item["docs"], ndoc, use_shorter) if use_shorter is not None else item["docs"][:ndoc]
+            text = "".join([make_doc_prompt(doc, doc_id, doc_prompt, use_shorter=use_shorter) for doc_id, doc in enumerate(doc_list)])
+            prompt = prompt.replace("{D}", text)
+    if not test:
+        answer = "\n" + "\n".join(item["answer"]) if isinstance(item["answer"], list) else item["answer"]
+        prompt = prompt.replace("{A}", "").rstrip() + answer
+    else:
+        prompt = prompt.replace("{A}", "").rstrip() # remove any space or \n
+    return prompt
+def load_llama_guard(model_id = "meta-llama/Llama-Guard-3-1B"):
+    from transformers import AutoTokenizer, AutoModelForCausalLM
+    dtype = torch.bfloat16
+    logger.info("loading llama_guard")
+    llama_guard_tokenizer = AutoTokenizer.from_pretrained(model_id)
+    llama_guard = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, device_map="cuda")
+    # Get the id of the "unsafe" token, this will later be used to extract its probability
+    UNSAFE_TOKEN_ID = llama_guard_tokenizer.convert_tokens_to_ids("unsafe")
+    return llama_guard, llama_guard_tokenizer, UNSAFE_TOKEN_ID
+def moderate(chat, model, tokenizer, UNSAFE_TOKEN_ID):
+    prompt = tokenizer.apply_chat_template(chat, return_tensors="pt", tokenize=False)
+    # Skip the generation of whitespace.
+    # Now the next predicted token will be either "safe" or "unsafe"
+    prompt += "\n\n"
+    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=50,
+        return_dict_in_generate=True,
+        pad_token_id=tokenizer.eos_token_id,
+        output_logits=True,  # get logits
+    )
+    ######
+    # Get generated text
+    ######
+    # Number of tokens that correspond to the input prompt
+    input_length = inputs.input_ids.shape[1]
+    # Ignore the tokens from the input to get the tokens generated by the model
+    generated_token_ids = outputs.sequences[:, input_length:].cpu()
+    generated_text = tokenizer.decode(generated_token_ids[0], skip_special_tokens=True)
+    ######
+    # Get Probability of "unsafe" token
+    ######
+    # First generated token is either "safe" or "unsafe".
+    # use the logits to calculate the probabilities.
+    first_token_logits = outputs.logits[0]
+    first_token_probs = torch.softmax(first_token_logits, dim=-1)
+    # From the probabilities of all tokens, extract the one for the "unsafe" token.
+    unsafe_probability = first_token_probs[0, UNSAFE_TOKEN_ID]
+    unsafe_probability = unsafe_probability.item()
+    ######
+    # Result
+    ######
+    return {
+        "unsafe_score": unsafe_probability,
+        "generated_text": generated_text
+    }
+def get_max_memory():
+    """Get the maximum memory available for the current GPU for loading models."""
+    free_in_GB = int(torch.cuda.mem_get_info()[0]/1024**3)
+    max_memory = f'{free_in_GB-1}GB'
+    n_gpus = torch.cuda.device_count()
+    max_memory = {i: max_memory for i in range(n_gpus)}
+    return max_memory
+def load_model(model_name_or_path, dtype=torch.bfloat16, int8=False):
+    # Load a huggingface model and tokenizer
+    # dtype: torch.float16 or torch.bfloat16
+    # int8: whether to use int8 quantization
+    # reserve_memory: how much memory to reserve for the model on each gpu (in GB)
+    # Load the FP16 model
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    logger.info(f"Loading {model_name_or_path} in {dtype}...")
+    if int8:
+        logger.warn("Use LLM.int8")
+    start_time = time.time()
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        device_map='auto',
+        torch_dtype=dtype,
+        max_memory=get_max_memory(),
+        load_in_8bit=int8,
+    )
+    logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
+    # Load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+    tokenizer.padding_side = "left"
+    return model, tokenizer
+def load_vllm(model_name_or_path, dtype=torch.bfloat16):
+    from vllm import LLM, SamplingParams
+    logger.info(f"Loading {model_name_or_path} in {dtype}...")
+    start_time = time.time()
+    model = LLM(
+        model_name_or_path,
+        dtype=dtype,
+        gpu_memory_utilization=0.9,
+        max_seq_len_to_capture=2048,
+        max_model_len=8192,
+    )
+    sampling_params = SamplingParams(temperature=0.1, top_p=1.00, max_tokens=300)
+    logger.info("Finish loading in %.2f sec." % (time.time() - start_time))
+    # Load the tokenizer
+    tokenizer = model.get_tokenizer()
+    tokenizer.padding_side = "left"
+    return model, tokenizer, sampling_params
+class LLM:
+    def __init__(self, model_name_or_path, use_vllm=True):
+        self.use_vllm = use_vllm
+        if use_vllm:
+            self.chat_llm, self.tokenizer, self.sampling_params = load_vllm(model_name_or_path)
+        else:
+            self.chat_llm, self.tokenizer = load_model(model_name_or_path)
+        self.prompt_exceed_max_length = 0
+        self.fewer_than_50 = 0
+    def generate(self, prompt, max_tokens=300, stop=None):
+        if max_tokens <= 0:
+            self.prompt_exceed_max_length += 1
+            logger.warning("Prompt exceeds max length and return an empty string as answer. If this happens too many times, it is suggested to make the prompt shorter")
+            return ""
+        if max_tokens < 50:
+            self.fewer_than_50 += 1
+            logger.warning("The model can at most generate < 50 tokens. If this happens too many times, it is suggested to make the prompt shorter")
+        if self.use_vllm:
+            inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], add_generation_prompt=True, tokenize=False)
+            self.sampling_params.n = 1  # Number of output sequences to return for the given prompt
+            self.sampling_params.stop_token_ids = [self.chat_llm.llm_engine.get_model_config().hf_config.eos_token_id]
+            self.sampling_params.max_tokens = max_tokens
+            output = self.chat_llm.generate(
+                inputs,
+                self.sampling_params,
+                use_tqdm=True,
+            )
+            generation = output[0].outputs[0].text.strip()
+        else:
+            inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], add_generation_prompt=True, return_dict=True, return_tensors="pt").to(self.chat_llm.device)
+            outputs = self.chat_llm.generate(
+                **inputs,
+                do_sample=True, temperature=0.1, top_p=1.0,
+                max_new_tokens=max_tokens,
+                num_return_sequences=1,
+                eos_token_id=[self.chat_llm.config.eos_token_id]
+            )
+            generation = self.tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True).strip()
+        return generation