khalednabawi11 commited on
Commit
1e8aaad
·
verified ·
1 Parent(s): fe9c668

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -51
app.py CHANGED
@@ -1,67 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
- import os
 
 
 
 
 
 
 
4
 
5
- """
6
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
- """
8
 
9
- token = os.getenv("HF_TOKEN")
10
- client = InferenceClient("BioMistral/BioMistral-7B", token=token)
 
 
 
 
 
11
 
 
 
12
 
13
- def respond(
14
- message,
15
- history: list[tuple[str, str]],
16
- system_message,
17
- max_tokens,
18
- temperature,
19
- top_p,
20
- ):
21
- messages = [{"role": "system", "content": system_message}]
22
 
23
- for val in history:
24
- if val[0]:
25
- messages.append({"role": "user", "content": val[0]})
26
- if val[1]:
27
- messages.append({"role": "assistant", "content": val[1]})
28
 
29
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
30
 
31
- response = ""
 
 
 
 
 
 
32
 
33
- for message in client.chat_completion(
34
- messages,
35
- max_tokens=max_tokens,
36
- stream=True,
37
- temperature=temperature,
38
- top_p=top_p,
39
- ):
40
- token = message.choices[0].delta.content
41
 
42
- response += token
43
- yield response
 
 
 
 
44
 
 
 
 
 
 
 
 
 
 
45
 
 
 
46
  """
47
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
48
- """
49
- demo = gr.ChatInterface(
50
- respond,
51
- additional_inputs=[
52
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
53
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
54
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
55
- gr.Slider(
56
- minimum=0.1,
57
- maximum=1.0,
58
- value=0.95,
59
- step=0.05,
60
- label="Top-p (nucleus sampling)",
61
- ),
62
- ],
63
- )
64
 
 
 
 
 
 
 
 
 
 
65
 
 
66
  if __name__ == "__main__":
67
- demo.launch()
 
1
+ # import gradio as gr
2
+ # from huggingface_hub import InferenceClient
3
+ # import os
4
+
5
+ # """
6
+ # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
+ # """
8
+
9
+ # token = os.getenv("HF_TOKEN")
10
+ # client = InferenceClient("BioMistral/BioMistral-7B", token=token)
11
+
12
+
13
+ # def respond(
14
+ # message,
15
+ # history: list[tuple[str, str]],
16
+ # system_message,
17
+ # max_tokens,
18
+ # temperature,
19
+ # top_p,
20
+ # ):
21
+ # messages = [{"role": "system", "content": system_message}]
22
+
23
+ # for val in history:
24
+ # if val[0]:
25
+ # messages.append({"role": "user", "content": val[0]})
26
+ # if val[1]:
27
+ # messages.append({"role": "assistant", "content": val[1]})
28
+
29
+ # messages.append({"role": "user", "content": message})
30
+
31
+ # response = ""
32
+
33
+ # for message in client.chat_completion(
34
+ # messages,
35
+ # max_tokens=max_tokens,
36
+ # stream=True,
37
+ # temperature=temperature,
38
+ # top_p=top_p,
39
+ # ):
40
+ # token = message.choices[0].delta.content
41
+
42
+ # response += token
43
+ # yield response
44
+
45
+
46
+ # """
47
+ # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
48
+ # """
49
+ # demo = gr.ChatInterface(
50
+ # respond,
51
+ # additional_inputs=[
52
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
53
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
54
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
55
+ # gr.Slider(
56
+ # minimum=0.1,
57
+ # maximum=1.0,
58
+ # value=0.95,
59
+ # step=0.05,
60
+ # label="Top-p (nucleus sampling)",
61
+ # ),
62
+ # ],
63
+ # )
64
+
65
+
66
+ # if __name__ == "__main__":
67
+ # demo.launch()
68
+
69
+
70
  import gradio as gr
71
+ from langdetect import detect
72
+ from transformers import pipeline
73
+ from qdrant_client import QdrantClient
74
+ from qdrant_client.models import VectorParams, Distance
75
+ from langchain.llms import HuggingFacePipeline
76
+ from langchain.chains import RetrievalQA
77
+ from langchain.vectorstores import Qdrant
78
+ from transformers import GenerationConfig, FastLanguageModel
79
+ from langchain.embeddings import HuggingFaceEmbeddings
80
 
81
+ # Define model path
82
+ model_name = "FreedomIntelligence/Apollo-7B"
 
83
 
84
+ # Load model with Unsloth (4-bit QLoRA)
85
+ model, tokenizer = FastLanguageModel.from_pretrained(
86
+ model_name=model_name,
87
+ max_seq_length=2048,
88
+ dtype=torch.float16,
89
+ load_in_4bit=True
90
+ )
91
 
92
+ # Enable padding token if missing
93
+ tokenizer.pad_token = tokenizer.eos_token
94
 
95
+ # Set up Qdrant vector store
96
+ qdrant_client = QdrantClient(url="https://your-qdrant-instance.com")
97
+ vector_size = 768
98
+ embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
 
 
 
 
 
99
 
100
+ qdrant_vectorstore = Qdrant(
101
+ client=qdrant_client,
102
+ collection_name="arabic_rag_collection",
103
+ embeddings=embedding
104
+ )
105
 
106
+ # Generation config
107
+ generation_config = GenerationConfig(
108
+ max_new_tokens=150,
109
+ temperature=0.2,
110
+ top_k=20,
111
+ do_sample=True,
112
+ top_p=0.7,
113
+ repetition_penalty=1.3,
114
+ )
115
 
116
+ # Set up HuggingFace Pipeline
117
+ llm_pipeline = pipeline(
118
+ model=model,
119
+ tokenizer=tokenizer,
120
+ task="text-generation",
121
+ generation_config=generation_config,
122
+ )
123
 
124
+ llm = HuggingFacePipeline(pipeline=llm_pipeline)
 
 
 
 
 
 
 
125
 
126
+ # Set up QA Chain
127
+ qa_chain = RetrievalQA.from_chain_type(
128
+ llm=llm,
129
+ retriever=qdrant_vectorstore.as_retriever(search_kwargs={"k": 3}),
130
+ chain_type="stuff"
131
+ )
132
 
133
+ # Generate prompt based on language
134
+ def generate_prompt(question):
135
+ lang = detect(question)
136
+ if lang == "ar":
137
+ return f"""أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، اس��خدم معرفتك الطبية السابقة.
138
+ وتأكد من ان:
139
+ - عدم تكرار أي نقطة أو عبارة أو كلمة
140
+ - وضوح وسلاسة كل نقطة
141
+ - تجنب الحشو والعبارات الزائدة-
142
 
143
+ السؤال: {question}
144
+ الإجابة:
145
  """
146
+ else:
147
+ return f"""Answer the following medical question in clear English with a detailed, non-redundant response. Do not repeat ideas, phrases, or restate the question in the answer. If the context lacks relevant information, rely on your prior medical knowledge. If the answer involves multiple points, list them in concise and distinct bullet points:
148
+ Question: {question}
149
+ Answer:"""
150
+
151
+ # Define Gradio interface function
152
+ def medical_chatbot(question):
153
+ formatted_question = generate_prompt(question)
154
+ answer = qa_chain.run(formatted_question)
155
+ return answer
 
 
 
 
 
 
 
156
 
157
+ # Set up Gradio interface
158
+ iface = gr.Interface(
159
+ fn=medical_chatbot,
160
+ inputs=gr.Textbox(label="Ask a Medical Question", placeholder="Type your question here..."),
161
+ outputs=gr.Textbox(label="Answer", interactive=False),
162
+ title="Medical Chatbot",
163
+ description="Ask medical questions and get detailed answers in Arabic or English.",
164
+ theme="compact"
165
+ )
166
 
167
+ # Launch Gradio interface
168
  if __name__ == "__main__":
169
+ iface.launch()