Andrew Gockenbach commited on
Commit
2217b4e
·
1 Parent(s): 578abe4

Convert to vllm api

Browse files
Files changed (2) hide show
  1. app.py +40 -775
  2. requirements.txt +2 -6
app.py CHANGED
@@ -1,794 +1,59 @@
1
  import os
2
  import json
3
- import random
4
- from threading import Thread
5
-
6
-
7
- import gradio as gr
8
- from gradio.themes import Base
9
- from gradio.themes.utils import colors
10
-
11
- from transformers import pipeline, TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
12
 
13
  SYSTEM_PROMPT = "You are a compliance assistant. Use the provided risk data to answer user questions. If a single risk object is given, provide a direct answer. If a list of risks is provided, summarize, compare, or analyze the collection as needed. Always base your response on the data provided."
14
  hf_token = os.environ["HF_TOKEN"]
15
 
16
- class HfModelWrapper:
17
  def __init__(
18
  self,
19
  model_path="casperhansen/llama-3.3-70b-instruct-awq",
20
- sys_prompt=SYSTEM_PROMPT,
21
  adapter_path="artemisiaai/fine-tuned-adapter",
 
 
22
  ):
23
-
24
- self.model = AutoModelForCausalLM.from_pretrained(
25
- model_path, device_map="auto"
26
- )
27
- self.tokenizer = AutoTokenizer.from_pretrained(model_path)
28
- self.sys_prompt = sys_prompt
29
  self.adapter_path = adapter_path
30
- self.model.load_adapter(self.adapter_path, token=hf_token)
31
- self.model.enable_adapters()
32
-
33
- def build_prompt(self, user_msg, history):
34
-
35
- inppt = []
36
- inppt.append({"role": "system", "content": self.sys_prompt})
37
- inppt += history
38
- inppt.append({"role": "user", "content": user_msg})
39
-
40
- prompt = self.tokenizer.apply_chat_template(
41
- inppt,
42
- tokenize=False,
43
- )
44
- return prompt
45
-
46
- def generate(self, user_input, history):
47
- input_text = self.build_prompt(user_input, history)
48
- input_ids = self.tokenizer.encode(input_text, return_tensors="pt").to("cuda")
49
-
50
- streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
51
-
52
- gen_kwargs = {
53
- "input_ids": input_ids,
54
- "streamer": streamer,
55
- "max_new_tokens": 2048,
56
- "min_length": 10,
57
- "temperature": 0.7,
58
- "top_p": 0.9,
59
- "top_k": 50,
60
- "repetition_penalty": 1.1,
61
- "do_sample": True,
62
- "eos_token_id": self.tokenizer.eos_token_id,
63
- "pad_token_id": self.tokenizer.pad_token_id,
64
- }
65
-
66
- thread = Thread(target=self.model.generate, kwargs=gen_kwargs)
67
- thread.start()
68
-
69
- return streamer
70
-
71
- # Custom theme colors based on brand standards
72
- class ArtemisiaTheme(Base):
73
- def __init__(self, **kwargs):
74
-
75
-
76
- # Configure Gradio's theme system with our colors
77
- super().__init__(
78
- font=["Segoe UI", "Tahoma", "Geneva", "Verdana", "sans-serif"],
79
- primary_hue=colors.indigo,
80
- neutral_hue=colors.gray,
81
- **kwargs
82
- )
83
- self.set(
84
- body_text_color="#ffffff",
85
- )
86
-
87
- # Background settings
88
- self.body_background_fill = "#4f008c"
89
- self.background_fill_primary = self.neutral_800
90
- self.block_background_fill = "transparent"
91
- self.block_label_background_fill = self.neutral_700
92
 
93
- # Components
94
- self.button_primary_background_fill = self.primary_900
95
- self.button_primary_background_fill_hover = self.primary_700
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- # Input fields
98
- self.input_background_fill = "#000000"
99
- self.input_border_color = "transparent"
100
 
101
- # Text colors
102
- self.text_color = "#ffffff"
103
- self.error_text_color = "#ff5252"
104
-
105
-
106
- # Custom CSS for exact layout and styling
107
- custom_css = """
108
- body {
109
- margin: 0;
110
- padding: 0;
111
- width: 100%;
112
- background-color: #ffffff;
113
- color: #000;
114
- font-family: 'Helvetica Neue', Helvetica, Arial, system-ui, -apple-system, BlinkMacSystemFont, sans-serif;
115
- font-weight: 400;
116
- line-height: 1.5;
117
- -webkit-font-smoothing: antialiased;
118
- -moz-osx-font-smoothing: grayscale;
119
- }
120
- .gradio-container {
121
- background-color: #ffffff !important;
122
- }
123
-
124
- .header {
125
- background-color: #ffffff;
126
- padding: 10px 20px;
127
- display: flex;
128
- flex-wrap: nowrap;
129
- align-items: flex-end;
130
- gap: 24px;
131
- }
132
-
133
- .logo-container {
134
- display: flex;
135
- gap: 0px;
136
- flex-direction: row;
137
- width: fit-content !important;
138
- flex: 0 0 auto !important;
139
- min-width: 0 !important;
140
- align-items: flex-end;
141
- }
142
-
143
- .nav-divider {
144
- width: 1px;
145
- height: 1rem;
146
- background-color: #4f008c;
147
- margin: 0 10px;
148
- align-self: flex-end;
149
- }
150
-
151
- .dashboard-title {
152
- font-size: 1rem;
153
- font-weight: 500;
154
- margin: 0;
155
- white-space: nowrap;
156
- flex: 1 1 0%;
157
- min-width: 0;
158
- overflow: hidden;
159
- text-overflow: ellipsis;
160
- letter-spacing: -0.02em;
161
- color: #4f008c !important;
162
- }
163
-
164
- .header h1 {
165
- color: #4f008c !important;
166
- }
167
-
168
- .stc-logo {
169
- display: flex;
170
- align-items: flex-end;
171
- padding: 0;
172
- margin: 0;
173
- }
174
-
175
- .stc-logo svg {
176
- height: 1rem;
177
- width: auto;
178
- display: block;
179
- overflow: hidden;
180
- scrollbar-width: none;
181
- overflow-x: hidden;
182
- fill: #4f008c !important;
183
- }
184
-
185
- .artemsia-logo {
186
- display: flex;
187
- align-items: flex-end;
188
- padding: 0;
189
- margin: 0;
190
- }
191
-
192
- .artemsia-logo svg {
193
- height: 1rem;
194
- width: auto;
195
- display: block;
196
- overflow: hidden;
197
- scrollbar-width: none;
198
- overflow-x: hidden;
199
- fill: #4f008c !important;
200
- }
201
-
202
- /* Make logos purple on white header */
203
- .stc-logo svg path {
204
- fill: #4f008c !important;
205
- }
206
-
207
- .artemsia-logo svg path {
208
- fill: #4f008c !important;
209
- }
210
-
211
- .about-section {
212
- padding: 12px 20px;
213
- background-color: #000;
214
- border-bottom: 1px solid #333;
215
- }
216
-
217
- .about-section h2 {
218
- color: #ff5252;
219
- margin-bottom: 6px;
220
- font-size: 1rem;
221
- font-weight: 600;
222
- }
223
-
224
- .about-section p {
225
- font-size: 0.875rem;
226
- line-height: 1.6;
227
- }
228
-
229
- .main-content {
230
- display: flex;
231
- padding: 0;
232
- background-color: #ffffff;
233
- gap: 8px;
234
- justify-content: space-between;
235
- }
236
-
237
- .chat-container, .results-container {
238
- flex: 1;
239
- background-color: transparent;
240
- border-radius: 15px;
241
- padding: 0;
242
- display: flex;
243
- flex-direction: column;
244
- min-height: 550px;
245
- max-height: 550px;
246
- height: 550px;
247
- overflow: hidden;
248
- margin: 0;
249
- gap: 0 !important;
250
- }
251
-
252
- .section-header {
253
- gap: 0;
254
- background-color: #120428;
255
- border-radius: 15px 15px 0 0;
256
- padding: 12px;
257
- margin-bottom: 0;
258
- }
259
-
260
- .section-header h2 {
261
- margin: 0;
262
- padding: 0;
263
- font-size: 1rem;
264
- font-weight: 600;
265
- letter-spacing: -0.02em;
266
- }
267
-
268
- .section-header p {
269
- margin: 0;
270
- padding: 4px 0 0 0;
271
- color: #ccc;
272
- font-size: 0.875rem;
273
- font-weight: 400;
274
- line-height: 1.4;
275
- }
276
-
277
- .chat-content {
278
- flex: 1;
279
- background-color: #0a0013;
280
- border-radius: 0 0 15px 15px;
281
- padding: 0;
282
- margin: 0;
283
- display: flex;
284
- flex-direction: column;
285
- justify-content: space-between;
286
- overflow: hidden;
287
- }
288
-
289
- .chat-messages {
290
- flex: 1;
291
- background: #120428 !important;
292
- padding: 10px !important;
293
- margin: 0 !important;
294
- max-width: 100%;
295
- border: none;
296
- overflow-y: auto;
297
- }
298
-
299
- .results-content {
300
- flex: 1;
301
- background-color: #0a0013;
302
- border-radius: 0 0 15px 15px;
303
- padding: 16px;
304
- margin: 0;
305
- display: flex;
306
- flex-direction: column;
307
- align-items: center;
308
- justify-content: center;
309
- text-align: center;
310
- }
311
-
312
- .placeholder-text {
313
- color: #aaa;
314
- font-size: 0.875rem;
315
- font-weight: 400;
316
- max-width: 400px;
317
- line-height: 1.6;
318
- }
319
-
320
- /* Updated styles to match the image reference */
321
- .gradio-container {
322
- max-width: 100% !important;
323
- padding: 0 !important;
324
- }
325
-
326
- .chat-container {
327
- padding: 0;
328
- margin: 0;
329
- }
330
-
331
- .results-container {
332
- padding: 0;
333
- margin: 0;
334
- min-width: min(50%, 100%) !important;
335
- }
336
-
337
- #component-0 > .gr-form > div:not(.about-section):not(.header) {
338
- background-color: #ffffff;
339
- }
340
-
341
- /* Chat messages container adjustments */
342
- .chat-messages {
343
- padding: 0 !important;
344
- margin: 0 !important;
345
- border: none !important;
346
- }
347
 
348
- /* Remove any gaps between elements, but exclude section headers */
349
- .chat-container > div:not(.section-header),
350
- .results-container > div:not(.section-header) {
351
- margin: 0 !important;
352
- padding: 0 !important;
353
- }
354
 
355
- /* Override flex-grow for section headers */
356
- #component-27,
357
- .column.section-header,
358
- div[id^="component"][class*="column section-header"] {
359
- flex-grow: 0 !important;
360
- }
361
-
362
- /* Add more forceful display for chatbot component */
363
- .gradio-chatbot {
364
- height: auto !important;
365
- min-height: 400px !important;
366
- display: block !important;
367
- width: 100% !important;
368
- overflow-y: auto !important;
369
- visibility: visible !important;
370
- opacity: 1 !important;
371
- background-color: #0a0013 !important;
372
- background: #0a0013 !important;
373
- }
374
-
375
- /* Also target the container of the chatbot */
376
- .message-wrap {
377
- display: block !important;
378
- visibility: visible !important;
379
- opacity: 1 !important;
380
- background-color: #0a0013 !important;
381
- background: #0a0013 !important;
382
- }
383
-
384
- /* Target the messages themselves */
385
- .message {
386
- display: block !important;
387
- visibility: visible !important;
388
- opacity: 1 !important;
389
- background-color: #0a0013 !important;
390
- }
391
-
392
- /* Ensure chat bubbles have proper max-width */
393
- .bubble-wrap {
394
- background: #0A0013 !important;
395
- background-color: #0A0013 !important;
396
- margin: 0 !important;
397
- padding: 0 !important;
398
- min-width: 100%;
399
- border: none !important;
400
- }
401
-
402
- /* Fix scrollable area background */
403
- .gradio-chatbot .messages-wrapper {
404
- background-color: #0a0013 !important;
405
- background: #0a0013 !important;
406
- }
407
-
408
- /* Ensure the entire chat area has consistent background */
409
- .gradio-chatbot .wrapper,
410
- .gradio-chatbot .container,
411
- .gradio-chatbot .messages {
412
- background-color: #0a0013 !important;
413
- background: #0a0013 !important;
414
- }
415
-
416
- /* Target the chat bubble containers */
417
- .gradio-chatbot .messages-wrapper > div > div {
418
- max-width: 70% !important; /* Allow bubbles to be a bit wider */
419
- width: auto !important; /* Let content determine width, up to max-width */
420
- min-width: 30% !important; /* Ensure a minimum width */
421
- }
422
-
423
- /* Style the individual message bubbles */
424
- .gradio-chatbot .messages-wrapper > div > div > div {
425
- width: 100% !important; /* Text fills the bubble */
426
- padding: 10px 15px !important; /* Add more padding for better readability */
427
- border-radius: 12px !important; /* Rounded corners */
428
- }
429
-
430
- /* Focus on the text content inside bubbles */
431
- .gradio-chatbot .bubble {
432
- width: 100% !important; /* Fill the bubble width */
433
- max-width: 100% !important;
434
- word-wrap: break-word !important;
435
- overflow-wrap: break-word !important;
436
- display: block !important; /* Ensure it takes full width */
437
- white-space: normal !important; /* Allow proper wrapping */
438
- }
439
-
440
- /* Target the actual text paragraphs inside bubbles */
441
- .gradio-chatbot .bubble p,
442
- .gradio-chatbot .bubble span,
443
- .gradio-chatbot .bubble div {
444
- width: 100% !important;
445
- display: inline-block !important;
446
- word-break: break-word !important;
447
- white-space: normal !important;
448
- text-align: left !important;
449
- }
450
-
451
- /* Target the first lines of text specifically to ensure they fill the width */
452
- .gradio-chatbot .bubble p:first-child,
453
- .gradio-chatbot .bubble span:first-child {
454
- width: 100% !important;
455
- display: block !important;
456
- }
457
-
458
- /* Specific gradio selector */
459
- .chat-messages [data-testid="chatbot"] .message-wrap > div {
460
- max-width: 50% !important;
461
- }
462
-
463
- .chat-input-container {
464
- width: 100%;
465
- position: relative;
466
- border-top: 1px solid rgba(255, 255, 255, 0.1);
467
- padding: 16px;
468
- background-color: #000000;
469
- display: flex;
470
- gap: 0 !important;
471
- }
472
-
473
- .chat-input {
474
- flex-grow: 1;
475
- padding: 0px;
476
- background-color: #000000;
477
- border: none;
478
- border-radius: 8px 0 0 8px;
479
- color: #fff;
480
- font-size: 0.9rem;
481
- font-weight: 400;
482
- line-height: 1.5;
483
- width: 100%;
484
- margin: 0 !important;
485
- }
486
-
487
- .send-button {
488
- width: 50px !important;
489
- min-width: 40px !important;
490
- max-width: 50px !important;
491
- height: 100%;
492
- background-color: #993333;
493
- border: none !important;
494
- border-radius: 0 8px 8px 0;
495
- color: white;
496
- display: flex;
497
- align-items: center;
498
- justify-content: center;
499
- cursor: pointer;
500
- transition: background-color 0.2s ease;
501
- margin: 0 !important;
502
- }
503
-
504
- .send-button:hover {
505
- background-color: #b33c3c;
506
- }
507
-
508
- .container.show_textbox_border {
509
- border: none !important;
510
- background: transparent !important;
511
- box-shadow: none !important;
512
- margin: 0 !important;
513
- }
514
-
515
- .input-container {
516
- background: transparent !important;
517
- border: none !important;
518
- margin: 0 !important;
519
- }
520
-
521
- .input-container textarea {
522
- background: transparent !important;
523
- color: #fff !important;
524
- border: none !important;
525
- box-shadow: none !important;
526
- border-radius: 0 !important;
527
- min-width: min(80%, 100%) !important;
528
- }
529
-
530
- span[data-testid="block-info"] {
531
- display: none !important;
532
- }
533
-
534
- .chat-input::placeholder {
535
- color: #aaa;
536
- background-color: #000000;
537
- padding: 0px;
538
- }
539
-
540
- /* Paper plane icon styling for the send button */
541
- .send-icon {
542
- width: 20px;
543
- height: 20px;
544
- }
545
-
546
- .send-button svg {
547
- width: 20px;
548
- height: 20px;
549
- display: block;
550
- margin: auto;
551
- }
552
-
553
- .bot, .user {
554
- max-width: 70% !important;
555
- width: fit-content !important;
556
- }
557
-
558
- /* Enhanced typography for chat messages */
559
- .gradio-chatbot .message {
560
- font-family: 'Helvetica Neue', Helvetica, Arial, system-ui, -apple-system, BlinkMacSystemFont, sans-serif !important;
561
- font-size: 0.9rem !important;
562
- font-weight: 400 !important;
563
- line-height: 1.5 !important;
564
- -webkit-font-smoothing: antialiased !important;
565
- }
566
-
567
- /* Improved text styling for all components */
568
- .gradio-container * {
569
- font-family: 'Helvetica Neue', Helvetica, Arial, system-ui, -apple-system, BlinkMacSystemFont, sans-serif !important;
570
- }
571
-
572
- /* Enhanced headings */
573
- h1, h2, h3, h4, h5, h6 {
574
- font-family: 'Helvetica Neue', Helvetica, Arial, system-ui, -apple-system, BlinkMacSystemFont, sans-serif !important;
575
- font-weight: 600 !important;
576
- letter-spacing: -0.02em !important;
577
- line-height: 1.3 !important;
578
- }
579
-
580
- /* Button text improvements */
581
- button {
582
- font-family: 'Helvetica Neue', Helvetica, Arial, system-ui, -apple-system, BlinkMacSystemFont, sans-serif !important;
583
- font-weight: 500 !important;
584
- letter-spacing: -0.01em !important;
585
- }
586
-
587
- /* Input field improvements */
588
- input, textarea {
589
- font-family: 'Helvetica Neue', Helvetica, Arial, system-ui, -apple-system, BlinkMacSystemFont, sans-serif !important;
590
- font-weight: 400 !important;
591
- line-height: 1.5 !important;
592
- }
593
- """
594
-
595
- # SVG icons
596
- stc_logo_svg = """<svg xmlns="http://www.w3.org/2000/svg" width="66" height="32" viewBox="0 0 66 32" fill="#4f008c" style="display: block; flex-shrink: 0;"><path fill-rule="evenodd" clip-rule="evenodd" d="M31.625 6.80851L31.6213 0.00567279L24.7511 0L24.75 6.79666L31.625 6.80851ZM5.88232 23.0513L0 24.2653C0.424529 29.3706 6.18066 32.1214 11.3441 31.9959C14.2329 31.9257 16.8607 30.9901 18.5472 29.6392C22.4356 26.5245 22.4103 20.0758 17.2744 17.3711C15.3452 16.3551 13.2453 15.9901 11.4475 15.6777C9.10574 15.2707 7.27666 14.9528 7.00534 13.4013C6.4686 10.332 12.5096 9.46738 14.5772 12.2286C14.8637 12.6112 14.954 12.903 15.0505 13.2152L15.0505 13.2152C15.114 13.4203 15.1801 13.6342 15.3064 13.8884L20.889 12.6335C20.9094 12.6247 20.9455 12.6077 20.9725 12.5925C20.6349 7.68784 14.5151 5.00625 9.32399 5.5062C-0.27525 6.43076 -2.55039 16.9042 5.58028 20.2095C6.79516 20.7033 8.15129 20.9902 9.48385 21.2722C10.7248 21.5347 11.9453 21.793 13.0123 22.21C15.9184 23.3457 15.0929 26.4741 11.6771 26.7789C8.56398 27.0567 6.23313 25.7203 5.88232 23.0513ZM66 23.5042L59.9776 21.5819C59.8074 21.8761 59.6967 22.1399 59.5911 22.3913C59.4397 22.7522 59.2989 23.0876 59.0079 23.451C58.5671 24.0016 58.1301 24.4609 57.4746 24.8733C56.2011 25.6745 54.3913 26.0052 52.5943 25.6296C49.5135 24.9858 47.5738 22.504 47.4893 18.9828C47.4011 15.3106 49.2053 12.6962 52.1919 11.9434C56.004 10.9825 58.8882 12.9476 59.6362 15.8925L65.8508 13.9081C65.2461 10.9173 62.8376 8.44026 60.8112 7.24677C52.9767 2.63234 40.5366 7.03787 40.5625 18.7666C40.5881 30.3289 53.233 34.8158 60.8587 30.2249C62.1039 29.4752 63.2247 28.4579 64.0145 27.4692C64.723 26.5823 65.7798 24.9372 66 23.5042ZM24.8097 12.9397L31.6484 12.9362C31.6463 13.2625 31.6421 13.7315 31.6371 14.2928L31.637 14.3022L31.637 14.3033L31.637 14.3036L31.637 14.304C31.6122 17.0787 31.5674 22.0894 31.6626 23.2768C31.7957 24.9371 32.571 25.7899 34.2386 26.0043C35.9984 26.2305 36.9321 25.8072 38.115 25.2708L38.1151 25.2708C38.2402 25.2141 38.3681 25.1561 38.5 25.0975L38.4566 30.5261C35.605 32.6136 28.5618 32.8937 25.8963 28.6417C24.7526 26.8176 24.7717 24.551 24.7916 22.1906L24.7916 22.19C24.7942 21.8817 24.7968 21.5719 24.7968 21.2612C24.7969 20.3989 24.7853 19.5126 24.7737 18.6184V18.6183V18.6183V18.6182V18.6182C24.7489 16.7095 24.7236 14.7647 24.8097 12.9397ZM38.5 12.9362L31.625 12.935L31.6257 6.12766L38.4997 6.13078L38.5 12.9362Z" fill="#ffffff"></path></svg>"""
597
- artemsia_logo_svg = """
598
- <?xml version="1.0" encoding="UTF-8"?>
599
- <svg id="Layer_2" data-name="Layer 2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 351.26 430.29">
600
- <g id="Layer_1-2" data-name="Layer 1">
601
- <path d="M24.35,126.29c-5.54,0-10.14-1.51-13.79-4.52-3.64-3.03-6.31-7.15-8.02-12.38-1.7-5.22-2.54-11.1-2.54-17.65,0-5.92.78-12.11,2.35-18.58,1.57-6.48,3.92-12.78,7.06-18.88,3.15-6.11,6.99-11.65,11.52-16.63,4.54-4.97,9.76-8.94,15.67-11.9,5.92-2.96,12.46-4.44,19.65-4.44,5.15,0,9.23.73,12.25,2.19,3.03,1.45,5.73,3.3,8.13,5.56l10.02-5.67h6.02l-12.83,60.4c-.25,1.39-.63,3.41-1.13,6.04-.5,2.64-.91,5.35-1.23,8.13-.32,2.77-.48,5.03-.48,6.79,0,10.07,2.58,15.1,7.75,15.1,2.01,0,4.21-.57,6.6-1.71,2.39-1.14,4.96-2.71,7.73-4.71l1.15,1.5c-1.77,1.64-4.16,3.34-7.19,5.1-3.02,1.75-6.22,3.22-9.63,4.42-3.39,1.21-6.66,1.81-9.81,1.81-4.53,0-8.11-1.39-10.75-4.15-2.64-2.78-3.96-6.61-3.96-11.52,0-2.53.13-4.82.38-6.9.25-2.08.63-4.69,1.13-7.83h-.19c-4.41,9.7-9.6,17.19-15.58,22.48-5.97,5.28-12.73,7.92-20.27,7.92ZM32.46,121.19c3.4,0,6.67-1.26,9.81-3.77,3.15-2.51,6.07-5.69,8.77-9.52,2.71-3.84,4.97-7.81,6.79-11.9,1.83-4.09,3.07-7.65,3.71-10.67l11.31-55.13c-1.39-2.26-3.41-3.96-6.04-5.08-2.64-1.14-5.41-1.71-8.29-1.71-5.17,0-9.8,1.58-13.9,4.73-4.08,3.14-7.73,7.26-10.94,12.35-3.21,5.1-5.89,10.73-8.02,16.9-2.14,6.17-3.74,12.3-4.81,18.4-1.07,6.1-1.6,11.67-1.6,16.71,0,10.32,1.26,17.68,3.77,22.08,2.51,4.41,5.66,6.6,9.44,6.6Z"/>
602
- <path d="M113.81,124.4l17.19-81.92c.38-1.76.65-3.46.83-5.08.19-1.64.29-2.78.29-3.42,0-3.78-1.6-6.2-4.81-7.27-3.21-1.07-6.64-1.6-10.29-1.6h-1.5l.38-1.88h34.54l-4.33,27.17h.38c2.39-5.15,5.03-9.93,7.92-14.33,2.9-4.4,6.3-7.96,10.19-10.67,3.9-2.71,8.81-4.06,14.73-4.06,5.29,0,9.19,1.23,11.71,3.69,2.51,2.46,3.77,5.57,3.77,9.33,0,4.41-1.29,7.9-3.88,10.48-2.57,2.58-6.63,3.88-12.17,3.88.13-1.01.19-1.98.19-2.92v-2.75c0-4.65-.66-8.36-1.98-11.13-1.32-2.78-3.81-4.17-7.46-4.17-2.64,0-5.29,1.17-7.94,3.5-2.64,2.33-5.13,5.27-7.46,8.79-2.32,3.52-4.27,7.13-5.83,10.83-1.57,3.71-2.68,6.95-3.31,9.73l-13.58,63.79h-17.56Z"/>
603
- <path d="M228.01,126.29c-13.72,0-20.58-7.36-20.58-22.08,0-2.26.19-4.88.56-7.83.39-2.96.83-5.63,1.33-8.02l13.4-63.25h-14.33l.38-1.88c6.92,0,12.51-.76,16.79-2.27,4.28-1.51,7.8-3.96,10.56-7.35,2.78-3.4,5.36-7.94,7.75-13.6h1.9l-4.92,23.23h22.83l-.38,1.88h-22.83l-13.4,62.67c-.64,3.16-1.18,6.15-1.63,8.98-.43,2.83-.65,5.26-.65,7.27,0,5.66,1.13,9.71,3.4,12.17,2.26,2.46,5.47,3.69,9.63,3.69,2.39,0,4.94-.41,7.65-1.23,2.71-.82,5.07-1.86,7.08-3.13l.94,1.71c-3.78,2.64-7.81,4.81-12.08,6.52-4.28,1.69-8.74,2.54-13.4,2.54Z"/>
604
- <path d="M306.72,126.29c-6.67,0-12.81-1.45-18.42-4.33-5.6-2.9-10.06-7.21-13.4-12.94-3.33-5.73-5-13.01-5-21.81,0-6.92,1.19-14.21,3.58-21.88,2.4-7.68,5.99-14.85,10.77-21.52,4.78-6.68,10.66-12.09,17.65-16.25,6.98-4.15,15.07-6.23,24.25-6.23,7.93,0,14.09,1.93,18.5,5.77,4.4,3.83,6.6,9.15,6.6,15.94,0,6.04-2.36,11.68-7.08,16.9-4.71,5.22-11.16,9.47-19.33,12.75-8.18,3.27-17.43,4.9-27.75,4.9h-6.6c-.63,3.78-.94,7.49-.94,11.15,0,11.32,1.6,20.1,4.81,26.33,3.21,6.22,7.9,9.33,14.06,9.33,7.17,0,12.89-2.2,17.17-6.6,4.28-4.4,7.55-10.06,9.81-16.98,2.64.75,3.96,2.64,3.96,5.65,0,2.39-1.17,5.16-3.5,8.31-2.32,3.14-5.87,5.85-10.65,8.13-4.78,2.26-10.95,3.4-18.5,3.4ZM290.86,75.71h5.1c11.07,0,19.81-3.08,26.23-9.25s9.63-14.41,9.63-24.73c0-12.21-3.4-18.31-10.19-18.31-3.78,0-7.31,1.54-10.58,4.63-3.27,3.08-6.22,7.21-8.88,12.38-2.64,5.16-4.91,10.81-6.79,16.98-1.89,6.17-3.4,12.27-4.52,18.31Z"/>
605
- <path d="M159.7,278.29c-5.54,0-9.79-1.77-12.75-5.29-2.96-3.53-4.44-8.05-4.44-13.58s1.19-14.28,3.58-25.48l3.21-14.73c.5-2.51,1.28-6.13,2.35-10.85,1.07-4.72,1.6-9.09,1.6-13.13,0-2.01-.16-4.21-.48-6.6-.31-2.39-1.28-4.46-2.92-6.23-1.63-1.76-4.33-2.65-8.1-2.65-3.28,0-6.4,1.01-9.35,3.02-2.96,2.02-5.67,4.44-8.13,7.27-2.45,2.83-4.52,5.6-6.23,8.31-1.7,2.7-2.92,4.74-3.67,6.13l-7.35,33.04c-.38,1.75-.89,4.45-1.52,8.1-.63,3.66-1.22,7.59-1.79,11.81-.57,4.21-1.08,8.08-1.52,11.6-.43,3.52-.65,5.97-.65,7.35h-17.94c0-1.51.22-4,.67-7.46.44-3.46.98-7.35,1.6-11.69.63-4.34,1.31-8.59,2.06-12.75.76-4.15,1.46-7.67,2.08-10.56l3.21-14.73c.64-2.51,1.46-6.13,2.46-10.85,1.01-4.72,1.52-9.09,1.52-13.13,0-2.51-.32-4.96-.96-7.35-.63-2.39-1.79-4.34-3.5-5.85-1.7-1.51-4.24-2.27-7.63-2.27s-6.68,1.17-9.83,3.5c-3.14,2.33-6.03,5.14-8.67,8.42-2.64,3.27-4.78,6.41-6.42,9.44-1.64,3.02-2.58,5.21-2.83,6.58l-14.73,68.71h-17.54l17.17-81.92c.38-1.76.66-3.46.85-5.08.19-1.64.29-2.78.29-3.42,0-3.78-1.29-6.2-3.88-7.27-2.58-1.07-5.7-1.6-9.35-1.6h-1.5l.38-1.88h34.92l-5.1,23.77h.4c5.28-8.3,10.71-14.63,16.31-18.96,5.59-4.34,12.11-6.52,19.54-6.52s12.65,2.11,15.67,6.33c3.01,4.21,4.52,9.08,4.52,14.63,0,.75-.04,1.51-.1,2.27-.06.75-.15,1.5-.27,2.25h.38c5.29-8.3,10.76-14.63,16.42-18.96,5.67-4.34,12.15-6.52,19.46-6.52s12.63,2.11,15.65,6.33c3.03,4.21,4.54,9.08,4.54,14.63,0,3.14-.41,6.57-1.23,10.29-.82,3.71-1.54,7.2-2.17,10.48l-4.9,22.27c-.77,3.52-1.37,6.97-1.81,10.38-.43,3.41-.65,6.55-.65,9.44,0,4.41.72,7.8,2.17,10.19,1.44,2.39,3.68,3.58,6.71,3.58,3.9,0,7.99-1.69,12.27-5.08l.94,1.5c-3.03,2.52-6.4,4.94-10.1,7.27s-8.02,3.5-12.92,3.5Z"/>
606
- <path d="M227.45,156.35c-2.52,0-4.59-.72-6.23-2.17-1.64-1.46-2.46-3.88-2.46-7.27,0-4.15.88-7.23,2.65-9.25,1.76-2.01,4.03-3.02,6.79-3.02,2.53,0,4.6.66,6.23,1.98,1.64,1.32,2.46,3.81,2.46,7.46,0,4.78-1.01,8.02-3.02,9.73-2.02,1.7-4.16,2.54-6.42,2.54ZM214.42,278.29c-5.03,0-8.9-1.83-11.6-5.48-2.71-3.65-4.06-8.31-4.06-13.98,0-2.64.25-5.69.75-9.15.51-3.46,1.08-6.85,1.71-10.19s1.19-6.26,1.69-8.79l8.69-40.75c.38-1.89.56-3.53.56-4.92,0-3.4-1.04-5.57-3.13-6.52-2.07-.94-4.86-1.42-8.38-1.42h-2.46l.38-1.88h33.79l-12.83,60.58c-.25,1.39-.64,3.41-1.15,6.04-.5,2.64-.91,5.35-1.23,8.13-.31,2.77-.46,5.03-.46,6.79,0,9.18,2.7,13.77,8.1,13.77,2.78,0,5.23-.47,7.38-1.42,2.14-.94,4.34-2.17,6.6-3.67l1.13,1.5c-2.77,2.52-6.54,5.04-11.33,7.56-4.78,2.51-9.49,3.77-14.15,3.77Z"/>
607
- <path d="M278.78,278.29c-9.43,0-16.98-1.89-22.65-5.67-5.67-3.78-8.5-9-8.5-15.67,0-3.4.78-5.98,2.35-7.73,1.58-1.76,3.38-2.99,5.4-3.69,2.01-.69,3.78-1.04,5.29-1.04-.13,1-.22,2.04-.29,3.1-.06,1.07-.08,2.05-.08,2.94,0,7.17,1.51,13.27,4.52,18.31,3.01,5.03,7.92,7.54,14.73,7.54s11.98-1.95,15.94-5.85c3.97-3.9,5.96-9.25,5.96-16.04,0-5.79-1.45-10.38-4.35-13.77-2.89-3.4-7.79-6.67-14.71-9.81-6.93-3.28-12.47-7.08-16.63-11.42-4.16-4.34-6.23-10.3-6.23-17.85,0-8.05,3.27-14.76,9.81-20.1,6.55-5.34,15.43-8.02,26.63-8.02,6.92,0,12.51.89,16.79,2.65,4.28,1.77,7.45,4.04,9.52,6.81,2.08,2.77,3.13,5.59,3.13,8.48,0,4.03-1.35,6.99-4.06,8.88-2.7,1.89-6.06,2.83-10.08,2.83.13-1.01.21-1.96.27-2.83.07-.89.1-1.83.1-2.83,0-6.17-1.29-11.39-3.88-15.67-2.58-4.28-6.96-6.42-13.13-6.42-7.04,0-11.92,2.05-14.63,6.15-2.71,4.08-4.06,8.64-4.06,13.67,0,5.79,1.54,10.27,4.63,13.42,3.08,3.14,7.96,6.22,14.65,9.23,6.92,3.16,12.42,6.99,16.5,11.52,4.09,4.53,6.15,10.45,6.15,17.75,0,9.18-3.49,16.67-10.48,22.46-6.99,5.79-16.52,8.69-28.6,8.69Z"/>
608
- <path d="M120.34,309.1c-2.41,0-4.39-.78-5.96-2.35-1.57-1.57-2.35-4.3-2.35-8.21s.78-6.8,2.35-8.31c1.57-1.51,3.55-2.27,5.96-2.27,2.26,0,4.21.76,5.83,2.27,1.64,1.52,2.46,4.29,2.46,8.31s-.82,6.64-2.46,8.21c-1.63,1.57-3.57,2.35-5.83,2.35ZM94.46,428.4v-1.9h2.46c5.54,0,9.51-1.13,11.9-3.4,2.39-2.26,3.58-5.97,3.58-11.13v-68.15c0-5.28-1.2-9.05-3.58-11.31-2.39-2.28-6.35-3.42-11.9-3.42h-.56v-1.88h33.79v83.79c0,5.54,1.19,9.51,3.58,11.9,2.39,2.39,6.35,3.58,11.9,3.58h2.44v1.9h-53.6Z"/>
609
- <path d="M191.11,430.29c-9.7,0-17.31-2.48-22.85-7.46-5.53-4.97-8.29-11.8-8.29-20.48,0-9.69,3.55-16.93,10.67-21.71,7.11-4.78,17.9-7.42,32.38-7.94l15.67-.56v-14.15c0-7.05-.39-12.84-1.15-17.38-.75-4.53-2.39-7.89-4.9-10.08-2.52-2.21-6.35-3.31-11.52-3.31-7.29,0-11.98,2.27-14.06,6.79-2.07,4.53-3.1,11.64-3.1,21.33-5.29,0-9.29-.88-12-2.65-2.7-1.76-4.04-4.66-4.04-8.69,0-4.53,1.53-8.15,4.6-10.85,3.08-2.71,7.17-4.69,12.27-5.94,5.09-1.26,10.79-1.9,17.08-1.9,11.58,0,20.23,2.46,25.96,7.38,5.72,4.91,8.58,13.33,8.58,25.29v56.44c0,4.53.88,7.68,2.65,9.44,1.76,1.75,4.78,2.63,9.06,2.63h.56v1.9h-28.5v-19.81h-.38c-2.02,7.54-5.57,13.05-10.67,16.52-5.1,3.46-11.1,5.19-18.02,5.19ZM194.88,427.65c4.65,0,8.77-1.19,12.35-3.58,3.59-2.4,6.4-5.61,8.42-9.63,2.01-4.03,3.02-8.38,3.02-13.04v-27.17l-12.08.56c-10.7.5-18.09,2.95-22.19,7.35-4.08,4.41-6.13,11.83-6.13,22.27,0,15.49,5.53,23.23,16.6,23.23Z"/>
610
- </g>
611
- </svg>
612
- """
613
-
614
-
615
- # Paper plane SVG icon for send button
616
- paper_plane_svg = """<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
617
- <path d="M22 2L11 13"/>
618
- <path d="M22 2L15 22L11 13L2 9L22 2Z"/>
619
- </svg>"""
620
-
621
- wrapper = HfModelWrapper()
622
- with open("./question_mapping.json", "r") as f:
623
- database = json.load(f)
624
- database = {k.lower(): v for k, v in database.items()}
625
-
626
-
627
- # Mock data function for chatbot
628
- """def send_message(message, history):
629
- #if not message:
630
- # return history
631
- #history.append({"role": "assistant", "content": f"This is a response about: {message}"})
632
- #return history
633
-
634
- history.append({"role": "user", "content": message})
635
- # Recover extra data
636
- if message.lower() in database:
637
- context = random.choice(database[message.lower()])
638
- message += " \n"+context
639
- response_generator = wrapper.generate(message, history)
640
- partial = ""
641
- idx = 0
642
- for t in response_generator:
643
- if idx <= 3:
644
- idx += 1
645
- continue
646
- else:
647
- partial += t
648
- yield history + [{"role": "assistant", "content": partial}]"""
649
-
650
- def send_message(message, history):
651
- history.append({"role": "user", "content": message})
652
- context = ""
653
- # Recupera contesto extra se esiste
654
- if message.lower() in database:
655
- context = random.choice(database[message.lower()])
656
- message += " \n" + context
657
-
658
- response_generator = wrapper.generate(message, history)
659
- partial = ""
660
- idx = 0
661
- cleared = False
662
-
663
- if len(context) > 1000:
664
- context = context[:1000] + "[...]"
665
-
666
- for t in response_generator:
667
- if idx <= 3:
668
- idx += 1
669
- continue
670
- else:
671
- partial += t
672
- # Quando arriva il primo token utile, svuota la textbox
673
- if not cleared:
674
- cleared = True
675
- yield gr.update(value=""), history + [{"role": "assistant", "content": partial}], gr.update(value=context)
676
- else:
677
- yield "", history + [{"role": "assistant", "content": partial}], gr.update(value=context)
678
-
679
-
680
- # Create the dashboard
681
- with gr.Blocks(theme=ArtemisiaTheme(), css=custom_css) as demo:
682
- # Header
683
- with gr.Row(elem_classes="header"):
684
- with gr.Row(elem_classes="logo-container"):
685
- gr.HTML(stc_logo_svg, elem_classes="stc-logo")
686
- # gr.HTML('<div class="nav-divider"></div>', elem_classes="divider")
687
- # gr.HTML(artemsia_logo_svg, elem_classes="stc-logo")
688
- gr.Markdown("# Risk Insights Dashboard", elem_classes="dashboard-title")
689
-
690
-
691
-
692
- # Main content area
693
- with gr.Row(elem_classes="main-content"):
694
- # Chat column
695
- with gr.Column(scale=1, elem_classes="chat-container"):
696
- with gr.Column(elem_classes="section-header"):
697
- gr.Markdown("## Chat Interface")
698
- gr.Markdown("Query your Archer risk data here or ask questions for valuable insights.")
699
- with gr.Column(elem_classes="chat-content"):
700
- chatbot = gr.Chatbot(value=[
701
- {"role": "assistant", "content": "Welcome to Risk Insights. How can I help you analyze your risk data today?"}
702
- ],
703
- type='messages',
704
- elem_classes="chat-messages",
705
- layout="bubble")
706
- with gr.Row(elem_classes="chat-input-container"):
707
- chat_input = gr.Textbox(placeholder="Ask about stc Group's risk data", elem_classes="chat-input", label="", scale=1)
708
- send_button = gr.Button(value="→", elem_classes="send-button", scale=0)
709
-
710
- # Results column
711
- with gr.Column(scale=1, elem_classes="results-container"):
712
- with gr.Column(elem_classes="section-header"):
713
- gr.Markdown("## Results & Insights")
714
- gr.Markdown("Visualizations and data from your queries")
715
- with gr.Column(elem_classes="results-content"):
716
- results_placeholder = gr.Markdown("", elem_classes="placeholder-text", render=True)
717
-
718
- # Wire up the chat functionality
719
- #send_button.click(
720
- # fn=send_message,
721
- # inputs=[chat_input, chatbot],
722
- # outputs=[chatbot]
723
- #).then(lambda: "", None, chat_input)
724
-
725
- #chat_input.submit(
726
- # fn=send_message,
727
- # inputs=[chat_input, chatbot],
728
- # outputs=[chatbot]
729
- #).then(lambda: "", None, chat_input)
730
- """
731
- send_button.click(
732
- lambda msg: (gr.update(value=""), msg), # clears textbox immediately
733
- inputs=[chat_input],
734
- outputs=[chat_input, gr.State()], # dummy state to capture message
735
- ).then(
736
- fn=send_message,
737
- inputs=[chat_input, chatbot],
738
- outputs=[chatbot]
739
- )
740
-
741
- chat_input.submit(
742
- lambda msg: (gr.update(value=""), msg),
743
- inputs=[chat_input],
744
- outputs=[chat_input, gr.State()],
745
- ).then(
746
- fn=send_message,
747
- inputs=[chat_input, chatbot],
748
- outputs=[chatbot]
749
- )
750
- """
751
- send_button.click(
752
- fn=send_message,
753
- inputs=[chat_input, chatbot],
754
- outputs=[chat_input, chatbot, results_placeholder]
755
- )
756
-
757
- chat_input.submit(
758
- fn=send_message,
759
- inputs=[chat_input, chatbot],
760
- outputs=[chat_input, chatbot, results_placeholder]
761
- )
762
- # JavaScript for UI enhancements
763
- gr.HTML("""
764
- <script>
765
- // Replace the send button text with SVG icon
766
- document.addEventListener('DOMContentLoaded', function() {
767
- function updateSendButton() {
768
- const sendButtons = document.querySelectorAll('.send-button');
769
- if (sendButtons && sendButtons.length > 0) {
770
- sendButtons.forEach(button => {
771
- button.innerHTML = `<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M22 2L11 13"></path><path d="M22 2L15 22L11 13L2 9L22 2Z"></path></svg>`;
772
- });
773
- return true;
774
- }
775
- return false;
776
- }
777
-
778
- // Try immediately and then retry a few times to handle Gradio's dynamic loading
779
- let attempts = 0;
780
- const maxAttempts = 10;
781
- const checkInterval = setInterval(function() {
782
- if (updateSendButton() || attempts >= maxAttempts) {
783
- clearInterval(checkInterval);
784
- }
785
- attempts++;
786
- }, 500);
787
- });
788
- </script>
789
- """)
790
 
791
- # Launch the app
792
- #demo.launch(share=True)
793
  if __name__ == "__main__":
794
- demo.launch()
 
 
1
  import os
2
  import json
3
+ import subprocess
4
+ import time
5
+ import signal
6
+ import atexit
 
 
 
 
 
7
 
8
  SYSTEM_PROMPT = "You are a compliance assistant. Use the provided risk data to answer user questions. If a single risk object is given, provide a direct answer. If a list of risks is provided, summarize, compare, or analyze the collection as needed. Always base your response on the data provided."
9
  hf_token = os.environ["HF_TOKEN"]
10
 
11
+ class VllmApiServer:
12
  def __init__(
13
  self,
14
  model_path="casperhansen/llama-3.3-70b-instruct-awq",
 
15
  adapter_path="artemisiaai/fine-tuned-adapter",
16
+ port=7860, # Default HuggingFace Spaces port
17
+ host="0.0.0.0"
18
  ):
19
+ self.model_path = model_path
 
 
 
 
 
20
  self.adapter_path = adapter_path
21
+ self.port = port
22
+ self.host = host
23
+ self.server_process = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Register cleanup on exit
26
+ atexit.register(self._cleanup_server)
27
+
28
+ def _start_vllm_server(self):
29
+ """Start vLLM OpenAI API server"""
30
+ cmd = [
31
+ "python", "-m", "vllm.entrypoints.openai.api_server",
32
+ "--model", self.model_path,
33
+ "--host", self.host,
34
+ "--port", str(self.port),
35
+ "--enable-lora",
36
+ "--lora-modules", f"adapter={self.adapter_path}",
37
+ "--max-lora-rank", "64",
38
+ "--tensor-parallel-size", "1"
39
+ ]
40
 
41
+ print(f"Starting vLLM server with command: {' '.join(cmd)}")
42
+ print(f"API will be available at: http://{self.host}:{self.port}/v1")
 
43
 
44
+ # Run as main process (not subprocess for HuggingFace Spaces)
45
+ os.execvp("python", cmd)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ def _cleanup_server(self):
48
+ """Clean up vLLM server process"""
49
+ if self.server_process:
50
+ self.server_process.terminate()
51
+ self.server_process.wait()
 
52
 
53
+ def run(self):
54
+ """Start the vLLM API server"""
55
+ self._start_vllm_server()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
 
 
57
  if __name__ == "__main__":
58
+ server = VllmApiServer()
59
+ server.run()
requirements.txt CHANGED
@@ -1,6 +1,2 @@
1
- transformers
2
- peft
3
- torch
4
- accelerate
5
- jinja2
6
- autoawq
 
1
+ vllm
2
+ torch