Spaces:
Sleeping
Sleeping
File size: 4,251 Bytes
bdff9e1 b086c48 bdff9e1 4163ae6 bdff9e1 4163ae6 bdff9e1 4163ae6 bdff9e1 4163ae6 bdff9e1 d704d9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import requests
import json
class VectaraQuery():
def __init__(self, api_key: str, corpus_keys: list[str], prompt_name: str = None):
self.corpus_keys = corpus_keys
self.api_key = api_key
self.prompt_name = prompt_name if prompt_name else "vectara-summary-ext-24-05-sml"
self.conv_id = None
def get_body(self, query_str: str, stream: False):
corpora_list = [{
'corpus_key': corpus_key, 'lexical_interpolation': 0.005
} for corpus_key in self.corpus_keys
]
return {
'query': query_str,
'search':
{
'corpora': corpora_list,
'offset': 0,
'limit': 50,
'context_configuration':
{
'sentences_before': 2,
'sentences_after': 2,
'start_tag': "%START_SNIPPET%",
'end_tag': "%END_SNIPPET%",
},
'reranker':
{
'type': 'customer_reranker',
'reranker_id': 'rnk_272725719'
},
},
'generation':
{
'prompt_name': self.prompt_name,
'max_used_search_results': 10,
'response_language': 'eng',
'citations':
{
'style': 'none'
}
},
'chat':
{
'store': True
},
'stream_response': stream
}
def get_headers(self):
return {
"Content-Type": "application/json",
"Accept": "application/json",
"x-api-key": self.api_key,
"grpc-timeout": "60S"
}
def get_stream_headers(self):
return {
"Content-Type": "application/json",
"Accept": "text/event-stream",
"x-api-key": self.api_key,
"grpc-timeout": "60S"
}
def submit_query(self, query_str: str):
if self.conv_id:
endpoint = f"https://api.vectara.io/v2/chats/{self.conv_id}/turns"
else:
endpoint = "https://api.vectara.io/v2/chats"
body = self.get_body(query_str, stream=False)
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers())
if response.status_code != 200:
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
if response.status_code == 429:
return "Sorry, Vectara chat turns exceeds plan limit."
return "Sorry, something went wrong in my brain. Please try again later."
res = response.json()
if self.conv_id is None:
self.conv_id = res['chat_id']
summary = res['answer']
return summary
def submit_query_streaming(self, query_str: str):
if self.conv_id:
endpoint = f"https://api.vectara.io/v2/chats/{self.conv_id}/turns"
else:
endpoint = "https://api.vectara.io/v2/chats"
body = self.get_body(query_str, stream=True)
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_stream_headers(), stream=True)
if response.status_code != 200:
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
if response.status_code == 429:
return "Sorry, Vectara chat turns exceeds plan limit."
return "Sorry, something went wrong in my brain. Please try again later."
chunks = []
for line in response.iter_lines():
line = line.decode('utf-8')
if line: # filter out keep-alive new lines
key, value = line.split(':', 1)
if key == 'data':
line = json.loads(value)
if line['type'] == 'generation_chunk':
chunk = line['generation_chunk']
chunks.append(chunk)
yield chunk
return ''.join(chunks)
|