File size: 1,692 Bytes
aefc33c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from helpers import get_credentials
import requests
def hf_inference(prompt, model_id, temperature, max_new_tokens):
hf_token, _ = get_credentials.get_hf_credentials()
API_URL = "https://router.huggingface.co/together/v1/chat/completions"
headers = {
"Authorization": f"Bearer {hf_token}",
}
response = requests.post(
API_URL,
headers=headers,
json={
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
]
}
],
"model": model_id,
'temperature': temperature,
'max_new_tokens': max_new_tokens,
}
)
return response.json()["choices"][0]["message"]
def replicate_inference(prompt, model_id, temperature, max_new_tokens):
repl_token = get_credentials.get_replicate_credentials()
API_URL = f"https://api.replicate.com/v1/models/{model_id}/predictions"
headers = {
"Authorization": f"Bearer {repl_token}",
"Content-Type": "application/json",
"Prefer": "wait"
}
response = requests.post(
API_URL,
headers=headers,
json={
"input": {
"prompt": prompt,
"temperature": temperature,
"max_tokens": max_new_tokens,
}
}
)
return {
"content": "".join(response.json()['output'])
}
INFERENCE_HANDLER = {
'huggingface': hf_inference,
'replicate': replicate_inference
} |