Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Thomas G. Lopes
commited on
pricing info (#97)
Browse files- package.json +3 -4
- scripts/update-ctx-length.ts +0 -55
- src/lib/components/inference-playground/playground.svelte +3 -2
- src/lib/components/inference-playground/provider-select.svelte +28 -3
- src/lib/data/context_length.json +0 -270
- src/lib/server/providers/cohere.ts +0 -35
- src/lib/server/providers/fireworks.ts +0 -41
- src/lib/server/providers/hyperbolic.ts +0 -41
- src/lib/server/providers/index.ts +0 -224
- src/lib/server/providers/nebius.ts +0 -49
- src/lib/server/providers/novita.ts +0 -46
- src/lib/server/providers/replicate.ts +0 -37
- src/lib/server/providers/sambanova.ts +0 -52
- src/lib/server/providers/together.ts +0 -37
- src/lib/state/conversations.svelte.ts +14 -2
- src/lib/state/pricing.svelte.ts +78 -0
- src/lib/types.ts +1 -0
- src/lib/utils/business.svelte.ts +14 -18
- src/lib/utils/number.ts +9 -0
- src/routes/+page.ts +12 -3
package.json
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
"version": "0.0.1",
|
4 |
"private": true,
|
5 |
"scripts": {
|
6 |
-
"dev": "
|
7 |
"build": "pnpm run update-ctx-length && vite build",
|
8 |
"preview": "vite preview",
|
9 |
"prepare": "ts-patch install && svelte-kit sync || echo ''",
|
@@ -12,9 +12,8 @@
|
|
12 |
"lint": "prettier . --check . && eslint src/",
|
13 |
"format": "prettier . --write .",
|
14 |
"clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'",
|
15 |
-
"
|
16 |
-
"test
|
17 |
-
"test": "npm run test:unit -- --run && npm run test:e2e",
|
18 |
"test:e2e": "playwright test"
|
19 |
},
|
20 |
"devDependencies": {
|
|
|
3 |
"version": "0.0.1",
|
4 |
"private": true,
|
5 |
"scripts": {
|
6 |
+
"dev": "vite dev",
|
7 |
"build": "pnpm run update-ctx-length && vite build",
|
8 |
"preview": "vite preview",
|
9 |
"prepare": "ts-patch install && svelte-kit sync || echo ''",
|
|
|
12 |
"lint": "prettier . --check . && eslint src/",
|
13 |
"format": "prettier . --write .",
|
14 |
"clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'",
|
15 |
+
"test:unit": "vitest --browser.headless",
|
16 |
+
"test": "npm run test:unit",
|
|
|
17 |
"test:e2e": "playwright test"
|
18 |
},
|
19 |
"devDependencies": {
|
scripts/update-ctx-length.ts
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
import dotenv from "dotenv";
|
2 |
-
dotenv.config(); // Load .env file into process.env
|
3 |
-
|
4 |
-
import { fetchAllProviderData, type ApiKeys } from "../src/lib/server/providers/index.js"; // Import ApiKeys type
|
5 |
-
import fs from "fs/promises";
|
6 |
-
import path from "path";
|
7 |
-
|
8 |
-
const CACHE_FILE_PATH = path.resolve("src/lib/data/context_length.json");
|
9 |
-
|
10 |
-
async function runUpdate() {
|
11 |
-
console.log("Starting context length cache update...");
|
12 |
-
|
13 |
-
// Gather API keys from process.env
|
14 |
-
const apiKeys: ApiKeys = {
|
15 |
-
COHERE_API_KEY: process.env.COHERE_API_KEY,
|
16 |
-
TOGETHER_API_KEY: process.env.TOGETHER_API_KEY,
|
17 |
-
FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
|
18 |
-
HYPERBOLIC_API_KEY: process.env.HYPERBOLIC_API_KEY,
|
19 |
-
REPLICATE_API_KEY: process.env.REPLICATE_API_KEY,
|
20 |
-
NEBIUS_API_KEY: process.env.NEBIUS_API_KEY,
|
21 |
-
NOVITA_API_KEY: process.env.NOVITA_API_KEY,
|
22 |
-
SAMBANOVA_API_KEY: process.env.SAMBANOVA_API_KEY,
|
23 |
-
};
|
24 |
-
|
25 |
-
try {
|
26 |
-
// Fetch data from all supported providers concurrently, passing keys
|
27 |
-
const fetchedData = await fetchAllProviderData(apiKeys);
|
28 |
-
|
29 |
-
// Read existing manual/cached data
|
30 |
-
let existingData = {};
|
31 |
-
try {
|
32 |
-
const currentCache = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
33 |
-
existingData = JSON.parse(currentCache);
|
34 |
-
} catch {
|
35 |
-
// Remove unused variable name
|
36 |
-
console.log("No existing cache file found or error reading, creating new one.");
|
37 |
-
}
|
38 |
-
|
39 |
-
// Merge fetched data with existing data (fetched data takes precedence)
|
40 |
-
const combinedData = { ...existingData, ...fetchedData };
|
41 |
-
|
42 |
-
// Write the combined data back to the file
|
43 |
-
const tempFilePath = CACHE_FILE_PATH + ".tmp";
|
44 |
-
await fs.writeFile(tempFilePath, JSON.stringify(combinedData, null, "\t"), "utf-8");
|
45 |
-
await fs.rename(tempFilePath, CACHE_FILE_PATH);
|
46 |
-
|
47 |
-
console.log("Context length cache update complete.");
|
48 |
-
console.log(`Cache file written to: ${CACHE_FILE_PATH}`);
|
49 |
-
} catch (error) {
|
50 |
-
console.error("Error during context length cache update:", error);
|
51 |
-
process.exit(1); // Exit with error code
|
52 |
-
}
|
53 |
-
}
|
54 |
-
|
55 |
-
runUpdate();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/components/inference-playground/playground.svelte
CHANGED
@@ -28,6 +28,7 @@
|
|
28 |
import BillingIndicator from "../billing-indicator.svelte";
|
29 |
import { TEST_IDS } from "$lib/constants.js";
|
30 |
import MessageTextarea from "./message-textarea.svelte";
|
|
|
31 |
|
32 |
let viewCode = $state(false);
|
33 |
let viewSettings = $state(false);
|
@@ -155,7 +156,7 @@
|
|
155 |
<div
|
156 |
class="pointer-events-none absolute inset-0 flex flex-1 shrink-0 items-center justify-around gap-x-8 text-center text-sm text-gray-500 max-xl:hidden"
|
157 |
>
|
158 |
-
{#each iterate(conversations.generationStats) as [{ latency, tokens }, isLast]}
|
159 |
{@const baLeft = observed["bottom-actions"].rect.left}
|
160 |
{@const tceRight = observed["token-count-end"].offset.right}
|
161 |
<span
|
@@ -165,7 +166,7 @@
|
|
165 |
useRaf: true,
|
166 |
})}
|
167 |
>
|
168 |
-
{tokens} tokens · Latency {latency}ms
|
169 |
</span>
|
170 |
{/each}
|
171 |
</div>
|
|
|
28 |
import BillingIndicator from "../billing-indicator.svelte";
|
29 |
import { TEST_IDS } from "$lib/constants.js";
|
30 |
import MessageTextarea from "./message-textarea.svelte";
|
31 |
+
import { atLeastNDecimals } from "$lib/utils/number.js";
|
32 |
|
33 |
let viewCode = $state(false);
|
34 |
let viewSettings = $state(false);
|
|
|
156 |
<div
|
157 |
class="pointer-events-none absolute inset-0 flex flex-1 shrink-0 items-center justify-around gap-x-8 text-center text-sm text-gray-500 max-xl:hidden"
|
158 |
>
|
159 |
+
{#each iterate(conversations.generationStats) as [{ latency, tokens, cost }, isLast]}
|
160 |
{@const baLeft = observed["bottom-actions"].rect.left}
|
161 |
{@const tceRight = observed["token-count-end"].offset.right}
|
162 |
<span
|
|
|
166 |
useRaf: true,
|
167 |
})}
|
168 |
>
|
169 |
+
{tokens} tokens · Latency {latency}ms · Cost ${atLeastNDecimals(cost ?? 0, 1)}
|
170 |
</span>
|
171 |
{/each}
|
172 |
</div>
|
src/lib/components/inference-playground/provider-select.svelte
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
<script lang="ts">
|
2 |
import type { ConversationClass } from "$lib/state/conversations.svelte";
|
3 |
import { models } from "$lib/state/models.svelte";
|
|
|
4 |
import type { Model } from "$lib/types.js";
|
5 |
import { randomPick } from "$lib/utils/array.js";
|
6 |
import { cn } from "$lib/utils/cn.js";
|
@@ -75,6 +76,13 @@
|
|
75 |
if (provider in nameMap) return formatName(provider);
|
76 |
return provider === "auto" ? "Auto" : provider;
|
77 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
</script>
|
79 |
|
80 |
<div class="flex flex-col gap-2">
|
@@ -92,9 +100,16 @@
|
|
92 |
classes,
|
93 |
)}
|
94 |
>
|
95 |
-
<div class="flex items-center gap-
|
96 |
<IconProvider provider={conversation.data.provider} />
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
</div>
|
99 |
<div
|
100 |
class="absolute right-2 grid size-4 flex-none place-items-center rounded-sm bg-gray-100 text-xs dark:bg-gray-600"
|
@@ -105,12 +120,22 @@
|
|
105 |
|
106 |
<div {...select.content} class="rounded-lg border bg-gray-100 dark:border-gray-700 dark:bg-gray-800">
|
107 |
{#snippet option(provider: string)}
|
|
|
108 |
<div {...select.getOption(provider)} class="group block w-full p-1 text-sm dark:text-white">
|
109 |
<div
|
110 |
class="flex items-center gap-2 rounded-md px-2 py-1.5 group-data-[highlighted]:bg-gray-200 dark:group-data-[highlighted]:bg-gray-700"
|
111 |
>
|
112 |
<IconProvider {provider} />
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
</div>
|
115 |
</div>
|
116 |
{/snippet}
|
|
|
1 |
<script lang="ts">
|
2 |
import type { ConversationClass } from "$lib/state/conversations.svelte";
|
3 |
import { models } from "$lib/state/models.svelte";
|
4 |
+
import { pricing } from "$lib/state/pricing.svelte";
|
5 |
import type { Model } from "$lib/types.js";
|
6 |
import { randomPick } from "$lib/utils/array.js";
|
7 |
import { cn } from "$lib/utils/cn.js";
|
|
|
76 |
if (provider in nameMap) return formatName(provider);
|
77 |
return provider === "auto" ? "Auto" : provider;
|
78 |
}
|
79 |
+
|
80 |
+
function getProviderPricing(provider: string) {
|
81 |
+
if (provider === "auto") return null;
|
82 |
+
const pd = pricing.getPricing(conversation.model.id, provider);
|
83 |
+
return pricing.formatPricing(pd);
|
84 |
+
}
|
85 |
+
const providerPricing = $derived(getProviderPricing(conversation.data.provider ?? ""));
|
86 |
</script>
|
87 |
|
88 |
<div class="flex flex-col gap-2">
|
|
|
100 |
classes,
|
101 |
)}
|
102 |
>
|
103 |
+
<div class="flex items-center gap-2 text-sm">
|
104 |
<IconProvider provider={conversation.data.provider} />
|
105 |
+
<div class="flex flex-col items-start">
|
106 |
+
<span>{getProviderName(conversation.data.provider ?? "") ?? "loading"}</span>
|
107 |
+
{#if providerPricing}
|
108 |
+
<span class="text-xs text-gray-500 dark:text-gray-400">
|
109 |
+
In: {providerPricing.input} • Out: {providerPricing.output}
|
110 |
+
</span>
|
111 |
+
{/if}
|
112 |
+
</div>
|
113 |
</div>
|
114 |
<div
|
115 |
class="absolute right-2 grid size-4 flex-none place-items-center rounded-sm bg-gray-100 text-xs dark:bg-gray-600"
|
|
|
120 |
|
121 |
<div {...select.content} class="rounded-lg border bg-gray-100 dark:border-gray-700 dark:bg-gray-800">
|
122 |
{#snippet option(provider: string)}
|
123 |
+
{@const providerPricing = getProviderPricing(provider)}
|
124 |
<div {...select.getOption(provider)} class="group block w-full p-1 text-sm dark:text-white">
|
125 |
<div
|
126 |
class="flex items-center gap-2 rounded-md px-2 py-1.5 group-data-[highlighted]:bg-gray-200 dark:group-data-[highlighted]:bg-gray-700"
|
127 |
>
|
128 |
<IconProvider {provider} />
|
129 |
+
<div class="flex flex-col">
|
130 |
+
<span>{getProviderName(provider)}</span>
|
131 |
+
{#if providerPricing}
|
132 |
+
<div class="flex flex-col">
|
133 |
+
<span class="text-xs text-gray-500 dark:text-gray-400">
|
134 |
+
In: {providerPricing.input} • Out: {providerPricing.output}
|
135 |
+
</span>
|
136 |
+
</div>
|
137 |
+
{/if}
|
138 |
+
</div>
|
139 |
</div>
|
140 |
</div>
|
141 |
{/snippet}
|
src/lib/data/context_length.json
DELETED
@@ -1,270 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"replicate": {},
|
3 |
-
"sambanova": {
|
4 |
-
"DeepSeek-R1-0528": 32768,
|
5 |
-
"DeepSeek-R1-Distill-Llama-70B": 131072,
|
6 |
-
"DeepSeek-V3-0324": 32768,
|
7 |
-
"E5-Mistral-7B-Instruct": 4096,
|
8 |
-
"Llama-3.3-Swallow-70B-Instruct-v0.4": 131072,
|
9 |
-
"Llama-4-Maverick-17B-128E-Instruct": 131072,
|
10 |
-
"Meta-Llama-3.1-8B-Instruct": 16384,
|
11 |
-
"Meta-Llama-3.3-70B-Instruct": 131072,
|
12 |
-
"Qwen3-32B": 32768,
|
13 |
-
"Whisper-Large-v3": 4096
|
14 |
-
},
|
15 |
-
"nebius": {
|
16 |
-
"meta-llama/Meta-Llama-3.1-8B-Instruct-fast": 131072,
|
17 |
-
"meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
|
18 |
-
"meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
|
19 |
-
"meta-llama/Meta-Llama-3.1-405B-Instruct": 131072,
|
20 |
-
"meta-llama/Llama-Guard-3-8B": 131072,
|
21 |
-
"nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": 131072,
|
22 |
-
"mistralai/Mistral-Nemo-Instruct-2407": 128000,
|
23 |
-
"google/gemma-2-2b-it": 8192,
|
24 |
-
"google/gemma-2-9b-it-fast": 8192,
|
25 |
-
"Qwen/Qwen2.5-Coder-7B-fast": 32768,
|
26 |
-
"Qwen/Qwen2.5-Coder-7B": 32768,
|
27 |
-
"Qwen/Qwen2.5-Coder-32B-Instruct-fast": 131072,
|
28 |
-
"Qwen/Qwen2.5-Coder-32B-Instruct": 131072,
|
29 |
-
"Qwen/Qwen2.5-32B-Instruct-fast": 131072,
|
30 |
-
"Qwen/Qwen2.5-32B-Instruct": 131072,
|
31 |
-
"Qwen/Qwen2.5-72B-Instruct-fast": 131072,
|
32 |
-
"Qwen/Qwen2.5-72B-Instruct": 131072,
|
33 |
-
"Qwen/Qwen2-VL-72B-Instruct": 32768,
|
34 |
-
"aaditya/Llama3-OpenBioLLM-70B": 8192,
|
35 |
-
"BAAI/bge-en-icl": 32768,
|
36 |
-
"BAAI/bge-multilingual-gemma2": 8192,
|
37 |
-
"intfloat/e5-mistral-7b-instruct": 32768,
|
38 |
-
"meta-llama/Llama-3.3-70B-Instruct": 131072,
|
39 |
-
"meta-llama/Llama-3.3-70B-Instruct-fast": 131072,
|
40 |
-
"microsoft/phi-4": 16384,
|
41 |
-
"deepseek-ai/DeepSeek-V3": 163840,
|
42 |
-
"deepseek-ai/DeepSeek-R1": 163840,
|
43 |
-
"deepseek-ai/DeepSeek-R1-0528": 131072,
|
44 |
-
"NousResearch/Hermes-3-Llama-405B": 131072,
|
45 |
-
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
|
46 |
-
"deepseek-ai/DeepSeek-R1-fast": 163840,
|
47 |
-
"Qwen/QwQ-32B-fast": 131072,
|
48 |
-
"Qwen/QwQ-32B": 131072,
|
49 |
-
"Qwen/Qwen3-235B-A22B": 40960,
|
50 |
-
"Qwen/Qwen3-30B-A3B": 40960,
|
51 |
-
"Qwen/Qwen3-30B-A3B-fast": 40960,
|
52 |
-
"Qwen/Qwen3-32B": 40960,
|
53 |
-
"Qwen/Qwen3-32B-fast": 40960,
|
54 |
-
"Qwen/Qwen3-14B": 40960,
|
55 |
-
"Qwen/Qwen3-4B-fast": 40960,
|
56 |
-
"nvidia/Llama-3_3-Nemotron-Super-49B-v1": 131072,
|
57 |
-
"mistralai/Mistral-Small-3.1-24B-Instruct-2503": 131072,
|
58 |
-
"mistralai/Devstral-Small-2505": 128000,
|
59 |
-
"google/gemma-3-27b-it": 110000,
|
60 |
-
"google/gemma-3-27b-it-fast": 110000,
|
61 |
-
"Qwen/Qwen2.5-VL-72B-Instruct": 32000,
|
62 |
-
"Qwen/Qwen3-Embedding-8B": 40960,
|
63 |
-
"deepseek-ai/DeepSeek-V3-0324": 163840,
|
64 |
-
"deepseek-ai/DeepSeek-V3-0324-fast": 163840,
|
65 |
-
"black-forest-labs/flux-dev": 0,
|
66 |
-
"black-forest-labs/flux-schnell": 0,
|
67 |
-
"stability-ai/sdxl": 0
|
68 |
-
},
|
69 |
-
"novita": {
|
70 |
-
"deepseek/deepseek-v3-0324": 163840,
|
71 |
-
"moonshotai/kimi-k2-instruct": 131072,
|
72 |
-
"deepseek/deepseek-r1-0528": 163840,
|
73 |
-
"baidu/ernie-4.5-vl-424b-a47b": 123000,
|
74 |
-
"baidu/ernie-4.5-300b-a47b-paddle": 123000,
|
75 |
-
"qwen/qwen3-30b-a3b-fp8": 40960,
|
76 |
-
"minimaxai/minimax-m1-80k": 1000000,
|
77 |
-
"deepseek/deepseek-r1-0528-qwen3-8b": 128000,
|
78 |
-
"qwen/qwen3-32b-fp8": 40960,
|
79 |
-
"qwen/qwen2.5-vl-72b-instruct": 32768,
|
80 |
-
"qwen/qwen3-235b-a22b-fp8": 40960,
|
81 |
-
"deepseek/deepseek-v3-turbo": 64000,
|
82 |
-
"thudm/glm-4.1v-9b-thinking": 65536,
|
83 |
-
"meta-llama/llama-4-maverick-17b-128e-instruct-fp8": 1048576,
|
84 |
-
"google/gemma-3-27b-it": 32000,
|
85 |
-
"deepseek/deepseek-r1-turbo": 64000,
|
86 |
-
"Sao10K/L3-8B-Stheno-v3.2": 8192,
|
87 |
-
"gryphe/mythomax-l2-13b": 4096,
|
88 |
-
"deepseek/deepseek-prover-v2-671b": 160000,
|
89 |
-
"meta-llama/llama-4-scout-17b-16e-instruct": 131072,
|
90 |
-
"deepseek/deepseek-r1-distill-llama-8b": 32000,
|
91 |
-
"meta-llama/llama-3.1-8b-instruct": 16384,
|
92 |
-
"deepseek/deepseek-r1-distill-qwen-14b": 64000,
|
93 |
-
"meta-llama/llama-3.3-70b-instruct": 131072,
|
94 |
-
"qwen/qwen-2.5-72b-instruct": 32000,
|
95 |
-
"mistralai/mistral-nemo": 60288,
|
96 |
-
"deepseek/deepseek-r1-distill-qwen-32b": 64000,
|
97 |
-
"meta-llama/llama-3-8b-instruct": 8192,
|
98 |
-
"microsoft/wizardlm-2-8x22b": 65535,
|
99 |
-
"deepseek/deepseek-r1-distill-llama-70b": 32000,
|
100 |
-
"mistralai/mistral-7b-instruct": 32768,
|
101 |
-
"meta-llama/llama-3-70b-instruct": 8192,
|
102 |
-
"nousresearch/hermes-2-pro-llama-3-8b": 8192,
|
103 |
-
"sao10k/l3-70b-euryale-v2.1": 8192,
|
104 |
-
"cognitivecomputations/dolphin-mixtral-8x22b": 16000,
|
105 |
-
"sophosympatheia/midnight-rose-70b": 4096,
|
106 |
-
"sao10k/l3-8b-lunaris": 8192,
|
107 |
-
"baidu/ernie-4.5-vl-28b-a3b": 30000,
|
108 |
-
"baidu/ernie-4.5-21B-a3b": 120000,
|
109 |
-
"baidu/ernie-4.5-0.3b": 120000,
|
110 |
-
"google/gemma-3-1b-it": 32768,
|
111 |
-
"qwen/qwen3-8b-fp8": 128000,
|
112 |
-
"qwen/qwen3-4b-fp8": 128000,
|
113 |
-
"thudm/glm-4-32b-0414": 32000,
|
114 |
-
"qwen/qwen2.5-7b-instruct": 32000,
|
115 |
-
"meta-llama/llama-3.2-1b-instruct": 131000,
|
116 |
-
"meta-llama/llama-3.2-3b-instruct": 32768,
|
117 |
-
"sao10k/l31-70b-euryale-v2.2": 8192
|
118 |
-
},
|
119 |
-
"fal": {
|
120 |
-
"fal/model-name": 4096
|
121 |
-
},
|
122 |
-
"cerebras": {
|
123 |
-
"cerebras/model-name": 8192
|
124 |
-
},
|
125 |
-
"hf-inference": {
|
126 |
-
"google/gemma-2-9b-it": 8192,
|
127 |
-
"meta-llama/Meta-Llama-3-8B-Instruct": 8192
|
128 |
-
},
|
129 |
-
"hyperbolic": {
|
130 |
-
"Qwen/Qwen2.5-72B-Instruct": 131072,
|
131 |
-
"Qwen/Qwen2.5-VL-72B-Instruct": 32768,
|
132 |
-
"meta-llama/Meta-Llama-3-70B-Instruct": 8192,
|
133 |
-
"deepseek-ai/DeepSeek-V3": 131072,
|
134 |
-
"deepseek-ai/DeepSeek-V3-0324": 163840,
|
135 |
-
"meta-llama/Llama-3.3-70B-Instruct": 131072,
|
136 |
-
"Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
|
137 |
-
"meta-llama/Llama-3.2-3B-Instruct": 131072,
|
138 |
-
"NousResearch/Hermes-3-Llama-3.1-70B": 12288,
|
139 |
-
"meta-llama/Meta-Llama-3.1-405B-Instruct": 131000,
|
140 |
-
"meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
|
141 |
-
"meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
|
142 |
-
"mistralai/Pixtral-12B-2409": 32768,
|
143 |
-
"Qwen/Qwen2.5-VL-7B-Instruct": 32768,
|
144 |
-
"meta-llama/Meta-Llama-3.1-405B-FP8": 32768,
|
145 |
-
"deepseek-ai/DeepSeek-R1": 163840,
|
146 |
-
"Qwen/QwQ-32B": 131072
|
147 |
-
},
|
148 |
-
"cohere": {
|
149 |
-
"embed-english-light-v3.0": 512,
|
150 |
-
"embed-multilingual-v2.0": 256,
|
151 |
-
"rerank-v3.5": 4096,
|
152 |
-
"embed-v4.0": 8192,
|
153 |
-
"rerank-english-v3.0": 4096,
|
154 |
-
"command-r-08-2024": 132096,
|
155 |
-
"embed-english-light-v3.0-image": 0,
|
156 |
-
"embed-english-v3.0-image": 0,
|
157 |
-
"command-nightly": 288000,
|
158 |
-
"command-a-03-2025": 288000,
|
159 |
-
"command-r-plus-08-2024": 132096,
|
160 |
-
"c4ai-aya-vision-32b": 16384,
|
161 |
-
"command-r": 132096,
|
162 |
-
"command-r7b-12-2024": 132000,
|
163 |
-
"command-a-vision": 128000,
|
164 |
-
"command-r7b-arabic-02-2025": 128000,
|
165 |
-
"command-light-nightly": 4096,
|
166 |
-
"embed-english-v3.0": 512,
|
167 |
-
"embed-multilingual-light-v3.0-image": 0,
|
168 |
-
"embed-multilingual-v3.0-image": 0,
|
169 |
-
"c4ai-aya-expanse-32b": 128000
|
170 |
-
},
|
171 |
-
"together": {
|
172 |
-
"cartesia/sonic": 0,
|
173 |
-
"black-forest-labs/FLUX.1-kontext-pro": 0,
|
174 |
-
"Alibaba-NLP/gte-modernbert-base": 8192,
|
175 |
-
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
176 |
-
"cartesia/sonic-2": 0,
|
177 |
-
"togethercomputer/MoA-1": 32768,
|
178 |
-
"meta-llama/Meta-Llama-Guard-3-8B": 8192,
|
179 |
-
"togethercomputer/m2-bert-80M-32k-retrieval": 32768,
|
180 |
-
"deepseek-ai/DeepSeek-V3": 131072,
|
181 |
-
"moonshotai/Kimi-K2-Instruct": 131072,
|
182 |
-
"Qwen/Qwen2.5-7B-Instruct-Turbo": 32768,
|
183 |
-
"meta-llama/Llama-3-8b-chat-hf": 8192,
|
184 |
-
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815,
|
185 |
-
"togethercomputer/MoA-1-Turbo": 32768,
|
186 |
-
"eddiehou/meta-llama/Llama-3.1-405B": 12000,
|
187 |
-
"mistralai/Mistral-7B-Instruct-v0.2": 32768,
|
188 |
-
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072,
|
189 |
-
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072,
|
190 |
-
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072,
|
191 |
-
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072,
|
192 |
-
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072,
|
193 |
-
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": 8192,
|
194 |
-
"meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072,
|
195 |
-
"deepseek-ai/DeepSeek-R1": 163840,
|
196 |
-
"Qwen/Qwen2.5-VL-72B-Instruct": 32768,
|
197 |
-
"google/gemma-3n-E4B-it": 32768,
|
198 |
-
"arcee-ai/AFM-4.5B-Preview": 65536,
|
199 |
-
"lgai/exaone-3-5-32b-instruct": 32768,
|
200 |
-
"meta-llama/Llama-3-70b-chat-hf": 8192,
|
201 |
-
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
|
202 |
-
"google/gemma-2-27b-it": 8192,
|
203 |
-
"Qwen/Qwen2-72B-Instruct": 32768,
|
204 |
-
"meta-llama/Llama-2-70b-hf": 4096,
|
205 |
-
"Qwen/Qwen3-235B-A22B-fp8-tput": 40960,
|
206 |
-
"Salesforce/Llama-Rank-V1": 8192,
|
207 |
-
"mistralai/Mistral-Small-24B-Instruct-2501": 32768,
|
208 |
-
"Qwen/Qwen2-VL-72B-Instruct": 32768,
|
209 |
-
"mixedbread-ai/Mxbai-Rerank-Large-V2": 32768,
|
210 |
-
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072,
|
211 |
-
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768,
|
212 |
-
"meta-llama/Llama-Vision-Free": 131072,
|
213 |
-
"perplexity-ai/r1-1776": 163840,
|
214 |
-
"scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192,
|
215 |
-
"meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072,
|
216 |
-
"arcee-ai/maestro-reasoning": 131072,
|
217 |
-
"togethercomputer/Refuel-Llm-V2-Small": 8192,
|
218 |
-
"meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072,
|
219 |
-
"Qwen/Qwen2.5-Coder-32B-Instruct": 16384,
|
220 |
-
"arcee-ai/coder-large": 32768,
|
221 |
-
"Qwen/QwQ-32B": 131072,
|
222 |
-
"arcee-ai/virtuoso-large": 131072,
|
223 |
-
"arcee_ai/arcee-spotlight": 131072,
|
224 |
-
"arcee-ai/arcee-blitz": 32768,
|
225 |
-
"deepseek-ai/DeepSeek-R1-0528-tput": 163840,
|
226 |
-
"arcee-ai/virtuoso-medium-v2": 131072,
|
227 |
-
"arcee-ai/caller": 32768,
|
228 |
-
"marin-community/marin-8b-instruct": 4096,
|
229 |
-
"lgai/exaone-deep-32b": 32768,
|
230 |
-
"google/gemma-3-27b-it": 65536,
|
231 |
-
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
|
232 |
-
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
233 |
-
"mistralai/Mistral-7B-Instruct-v0.1": 32768,
|
234 |
-
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576,
|
235 |
-
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072,
|
236 |
-
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192,
|
237 |
-
"scb10x/scb10x-typhoon-2-1-gemma3-12b": 131072,
|
238 |
-
"meta-llama/Llama-Guard-4-12B": 1048576,
|
239 |
-
"togethercomputer/Refuel-Llm-V2": 16384,
|
240 |
-
"Qwen/Qwen2.5-72B-Instruct-Turbo": 131072,
|
241 |
-
"meta-llama/LlamaGuard-2-8b": 8192,
|
242 |
-
"meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192,
|
243 |
-
"intfloat/multilingual-e5-large-instruct": 514,
|
244 |
-
"meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576,
|
245 |
-
"yan/deepseek-ai-deepseek-v3": 163839,
|
246 |
-
"black-forest-labs/FLUX.1-kontext-max": 0
|
247 |
-
},
|
248 |
-
"fireworks-ai": {
|
249 |
-
"accounts/fireworks/models/deepseek-r1-0528": 163840,
|
250 |
-
"accounts/perplexity/models/r1-1776": 163840,
|
251 |
-
"accounts/fireworks/models/qwen3-30b-a3b": 131072,
|
252 |
-
"accounts/fireworks/models/llama4-scout-instruct-basic": 10485760,
|
253 |
-
"accounts/fireworks/models/llama4-maverick-instruct-basic": 1048576,
|
254 |
-
"accounts/fireworks/models/llama-v3p1-8b-instruct": 131072,
|
255 |
-
"accounts/fireworks/models/firesearch-ocr-v6": 8192,
|
256 |
-
"accounts/fireworks/models/llama-v3p1-405b-instruct": 131072,
|
257 |
-
"accounts/fireworks/models/mixtral-8x22b-instruct": 65536,
|
258 |
-
"accounts/fireworks/models/deepseek-r1-basic": 163840,
|
259 |
-
"accounts/fireworks/models/kimi-k2-instruct": 131072,
|
260 |
-
"accounts/fireworks/models/llama-v3p1-70b-instruct": 131072,
|
261 |
-
"accounts/fireworks/models/qwen3-235b-a22b": 131072,
|
262 |
-
"accounts/fireworks/models/llama-v3p3-70b-instruct": 131072,
|
263 |
-
"accounts/fireworks/models/deepseek-r1": 163840,
|
264 |
-
"accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new": 131072,
|
265 |
-
"accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b": 131072,
|
266 |
-
"accounts/fireworks/models/deepseek-v3": 131072,
|
267 |
-
"accounts/fireworks/models/deepseek-v3-0324": 163840,
|
268 |
-
"accounts/fireworks/models/qwen2p5-vl-32b-instruct": 128000
|
269 |
-
}
|
270 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/cohere.ts
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
import type { MaxTokensCache } from "./index.js";
|
2 |
-
|
3 |
-
const COHERE_API_URL = "https://api.cohere.ai/v1/models";
|
4 |
-
|
5 |
-
// Accept apiKey as an argument
|
6 |
-
export async function fetchCohereData(apiKey: string | undefined): Promise<MaxTokensCache["cohere"]> {
|
7 |
-
if (!apiKey) {
|
8 |
-
console.warn("Cohere API key not provided. Skipping Cohere fetch.");
|
9 |
-
return {};
|
10 |
-
}
|
11 |
-
try {
|
12 |
-
const response = await fetch(COHERE_API_URL, {
|
13 |
-
headers: {
|
14 |
-
Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
|
15 |
-
},
|
16 |
-
});
|
17 |
-
if (!response.ok) {
|
18 |
-
throw new Error(`Cohere API request failed: ${response.status} ${response.statusText}`);
|
19 |
-
}
|
20 |
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
21 |
-
const data: any = await response.json();
|
22 |
-
const modelsData: MaxTokensCache["cohere"] = {};
|
23 |
-
if (data?.models && Array.isArray(data.models)) {
|
24 |
-
for (const model of data.models) {
|
25 |
-
if (model.name && typeof model.context_length === "number") {
|
26 |
-
modelsData[model.name] = model.context_length;
|
27 |
-
}
|
28 |
-
}
|
29 |
-
}
|
30 |
-
return modelsData;
|
31 |
-
} catch (error) {
|
32 |
-
console.error("Error fetching Cohere data:", error);
|
33 |
-
return {};
|
34 |
-
}
|
35 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/fireworks.ts
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
import type { MaxTokensCache } from "./index.js";
|
2 |
-
|
3 |
-
const FIREWORKS_API_URL = "https://api.fireworks.ai/inference/v1/models"; // Assumed
|
4 |
-
|
5 |
-
export async function fetchFireworksData(apiKey: string | undefined): Promise<MaxTokensCache["fireworks-ai"]> {
|
6 |
-
if (!apiKey) {
|
7 |
-
console.warn("Fireworks AI API key not provided. Skipping Fireworks AI fetch.");
|
8 |
-
return {};
|
9 |
-
}
|
10 |
-
try {
|
11 |
-
const response = await fetch(FIREWORKS_API_URL, {
|
12 |
-
headers: {
|
13 |
-
Authorization: `Bearer ${apiKey}`,
|
14 |
-
},
|
15 |
-
});
|
16 |
-
if (!response.ok) {
|
17 |
-
throw new Error(`Fireworks AI API request failed: ${response.status} ${response.statusText}`);
|
18 |
-
}
|
19 |
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
20 |
-
const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
|
21 |
-
const modelsData: MaxTokensCache["fireworks-ai"] = {};
|
22 |
-
|
23 |
-
// Check if data and data.data exist and are an array
|
24 |
-
if (data?.data && Array.isArray(data.data)) {
|
25 |
-
for (const model of data.data) {
|
26 |
-
// Check for common context length fields (OpenAI uses context_window)
|
27 |
-
const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
|
28 |
-
// Fireworks uses model.id
|
29 |
-
if (model.id && typeof contextLength === "number") {
|
30 |
-
modelsData[model.id] = contextLength;
|
31 |
-
}
|
32 |
-
}
|
33 |
-
} else {
|
34 |
-
console.warn("Unexpected response structure from Fireworks AI API:", data);
|
35 |
-
}
|
36 |
-
return modelsData;
|
37 |
-
} catch (error) {
|
38 |
-
console.error("Error fetching Fireworks AI data:", error);
|
39 |
-
return {}; // Return empty on error
|
40 |
-
}
|
41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/hyperbolic.ts
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
import type { MaxTokensCache } from "./index.js";
|
2 |
-
|
3 |
-
const HYPERBOLIC_API_URL = "https://api.hyperbolic.xyz/v1/models"; // Assumed
|
4 |
-
|
5 |
-
export async function fetchHyperbolicData(apiKey: string | undefined): Promise<MaxTokensCache["hyperbolic"]> {
|
6 |
-
if (!apiKey) {
|
7 |
-
console.warn("Hyperbolic API key not provided. Skipping Hyperbolic fetch.");
|
8 |
-
return {};
|
9 |
-
}
|
10 |
-
try {
|
11 |
-
const response = await fetch(HYPERBOLIC_API_URL, {
|
12 |
-
headers: {
|
13 |
-
Authorization: `Bearer ${apiKey}`,
|
14 |
-
},
|
15 |
-
});
|
16 |
-
if (!response.ok) {
|
17 |
-
throw new Error(`Hyperbolic API request failed: ${response.status} ${response.statusText}`);
|
18 |
-
}
|
19 |
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
20 |
-
const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
|
21 |
-
const modelsData: MaxTokensCache["hyperbolic"] = {};
|
22 |
-
|
23 |
-
// Check if data and data.data exist and are an array
|
24 |
-
if (data?.data && Array.isArray(data.data)) {
|
25 |
-
for (const model of data.data) {
|
26 |
-
// Check for common context length fields (OpenAI uses context_window)
|
27 |
-
const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
|
28 |
-
// Assuming Hyperbolic uses model.id
|
29 |
-
if (model.id && typeof contextLength === "number") {
|
30 |
-
modelsData[model.id] = contextLength;
|
31 |
-
}
|
32 |
-
}
|
33 |
-
} else {
|
34 |
-
console.warn("Unexpected response structure from Hyperbolic API:", data);
|
35 |
-
}
|
36 |
-
return modelsData;
|
37 |
-
} catch (error) {
|
38 |
-
console.error("Error fetching Hyperbolic data:", error);
|
39 |
-
return {}; // Return empty on error
|
40 |
-
}
|
41 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/index.ts
DELETED
@@ -1,224 +0,0 @@
|
|
1 |
-
import fs from "fs/promises";
|
2 |
-
import path from "path";
|
3 |
-
import { fetchCohereData } from "./cohere.js";
|
4 |
-
import { fetchTogetherData } from "./together.js";
|
5 |
-
import { fetchFireworksData } from "./fireworks.js";
|
6 |
-
import { fetchHyperbolicData } from "./hyperbolic.js";
|
7 |
-
import { fetchReplicateData } from "./replicate.js";
|
8 |
-
import { fetchNebiusData } from "./nebius.js";
|
9 |
-
import { fetchNovitaData } from "./novita.js";
|
10 |
-
import { fetchSambanovaData } from "./sambanova.js";
|
11 |
-
|
12 |
-
// --- Constants ---
|
13 |
-
const CACHE_FILE_PATH = path.resolve("src/lib/server/data/context_length.json");
|
14 |
-
|
15 |
-
// --- Types ---
|
16 |
-
export interface MaxTokensCache {
|
17 |
-
[provider: string]: {
|
18 |
-
[modelId: string]: number;
|
19 |
-
};
|
20 |
-
}
|
21 |
-
|
22 |
-
// Type for API keys object passed to fetchAllProviderData
|
23 |
-
export interface ApiKeys {
|
24 |
-
COHERE_API_KEY?: string;
|
25 |
-
TOGETHER_API_KEY?: string;
|
26 |
-
FIREWORKS_API_KEY?: string;
|
27 |
-
HYPERBOLIC_API_KEY?: string;
|
28 |
-
REPLICATE_API_KEY?: string;
|
29 |
-
NEBIUS_API_KEY?: string;
|
30 |
-
NOVITA_API_KEY?: string;
|
31 |
-
SAMBANOVA_API_KEY?: string;
|
32 |
-
}
|
33 |
-
|
34 |
-
// --- Cache Handling ---
|
35 |
-
// (readCache and updateCache remain the same)
|
36 |
-
let memoryCache: MaxTokensCache | null = null;
|
37 |
-
let cacheReadPromise: Promise<MaxTokensCache> | null = null;
|
38 |
-
|
39 |
-
async function readCache(): Promise<MaxTokensCache> {
|
40 |
-
if (memoryCache) {
|
41 |
-
return memoryCache;
|
42 |
-
}
|
43 |
-
if (cacheReadPromise) {
|
44 |
-
return cacheReadPromise;
|
45 |
-
}
|
46 |
-
cacheReadPromise = (async () => {
|
47 |
-
try {
|
48 |
-
const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
49 |
-
memoryCache = JSON.parse(data) as MaxTokensCache;
|
50 |
-
return memoryCache!;
|
51 |
-
} catch (error: unknown) {
|
52 |
-
if (typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT") {
|
53 |
-
console.warn(`Cache file not found at ${CACHE_FILE_PATH}, starting with empty cache.`);
|
54 |
-
memoryCache = {};
|
55 |
-
return {};
|
56 |
-
}
|
57 |
-
console.error("Error reading context length cache file:", error);
|
58 |
-
memoryCache = {};
|
59 |
-
return {};
|
60 |
-
} finally {
|
61 |
-
cacheReadPromise = null;
|
62 |
-
}
|
63 |
-
})();
|
64 |
-
return cacheReadPromise;
|
65 |
-
}
|
66 |
-
|
67 |
-
const isBrowser = typeof window !== "undefined";
|
68 |
-
|
69 |
-
function serverLog(...txt: unknown[]) {
|
70 |
-
if (isBrowser) return;
|
71 |
-
console.log(...txt);
|
72 |
-
}
|
73 |
-
|
74 |
-
function serverError(...txt: unknown[]) {
|
75 |
-
if (isBrowser) return;
|
76 |
-
console.error(...txt);
|
77 |
-
}
|
78 |
-
|
79 |
-
async function updateCache(provider: string, modelId: string, maxTokens: number): Promise<void> {
|
80 |
-
try {
|
81 |
-
let cache: MaxTokensCache;
|
82 |
-
try {
|
83 |
-
const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
84 |
-
cache = JSON.parse(data) as MaxTokensCache;
|
85 |
-
} catch (readError: unknown) {
|
86 |
-
if (typeof readError === "object" && readError !== null && "code" in readError && readError.code === "ENOENT") {
|
87 |
-
cache = {};
|
88 |
-
} else {
|
89 |
-
throw readError;
|
90 |
-
}
|
91 |
-
}
|
92 |
-
if (!cache[provider]) {
|
93 |
-
cache[provider] = {};
|
94 |
-
}
|
95 |
-
cache[provider][modelId] = maxTokens;
|
96 |
-
const tempFilePath = CACHE_FILE_PATH + ".tmp";
|
97 |
-
await fs.writeFile(tempFilePath, JSON.stringify(cache, null, "\t"), "utf-8");
|
98 |
-
await fs.rename(tempFilePath, CACHE_FILE_PATH);
|
99 |
-
memoryCache = cache;
|
100 |
-
serverLog(`Cache updated for ${provider} - ${modelId}: ${maxTokens}`);
|
101 |
-
} catch (error) {
|
102 |
-
serverError(`Error updating context length cache for ${provider} - ${modelId}:`, error);
|
103 |
-
memoryCache = null;
|
104 |
-
}
|
105 |
-
}
|
106 |
-
|
107 |
-
// --- Main Exported Function ---
|
108 |
-
// Now accepts apiKey as the third argument
|
109 |
-
export async function getMaxTokens(
|
110 |
-
provider: string,
|
111 |
-
modelId: string,
|
112 |
-
apiKey: string | undefined,
|
113 |
-
): Promise<number | null> {
|
114 |
-
const cache = await readCache();
|
115 |
-
const cachedValue = cache[provider]?.[modelId];
|
116 |
-
|
117 |
-
if (cachedValue !== undefined) {
|
118 |
-
return cachedValue;
|
119 |
-
}
|
120 |
-
|
121 |
-
serverLog(`Cache miss for ${provider} - ${modelId}. Attempting live fetch...`);
|
122 |
-
|
123 |
-
let liveData: number | null = null;
|
124 |
-
let fetchedProviderData: MaxTokensCache[string] | null = null;
|
125 |
-
|
126 |
-
try {
|
127 |
-
// Pass the received apiKey to the fetcher functions
|
128 |
-
switch (provider) {
|
129 |
-
case "cohere":
|
130 |
-
fetchedProviderData = await fetchCohereData(apiKey); // Pass apiKey
|
131 |
-
liveData = fetchedProviderData?.[modelId] ?? null;
|
132 |
-
break;
|
133 |
-
case "together":
|
134 |
-
fetchedProviderData = await fetchTogetherData(apiKey); // Pass apiKey
|
135 |
-
liveData = fetchedProviderData?.[modelId] ?? null;
|
136 |
-
break;
|
137 |
-
case "fireworks-ai":
|
138 |
-
fetchedProviderData = await fetchFireworksData(apiKey); // Pass apiKey
|
139 |
-
liveData = fetchedProviderData?.[modelId] ?? null;
|
140 |
-
break;
|
141 |
-
case "hyperbolic":
|
142 |
-
fetchedProviderData = await fetchHyperbolicData(apiKey); // Pass apiKey
|
143 |
-
liveData = fetchedProviderData?.[modelId] ?? null;
|
144 |
-
break;
|
145 |
-
case "replicate":
|
146 |
-
fetchedProviderData = await fetchReplicateData(apiKey);
|
147 |
-
liveData = fetchedProviderData?.[modelId] ?? null;
|
148 |
-
break;
|
149 |
-
case "nebius":
|
150 |
-
fetchedProviderData = await fetchNebiusData(apiKey);
|
151 |
-
liveData = fetchedProviderData?.[modelId] ?? null;
|
152 |
-
break;
|
153 |
-
case "novita":
|
154 |
-
fetchedProviderData = await fetchNovitaData(apiKey);
|
155 |
-
liveData = fetchedProviderData?.[modelId] ?? null;
|
156 |
-
break;
|
157 |
-
case "sambanova":
|
158 |
-
fetchedProviderData = await fetchSambanovaData(apiKey);
|
159 |
-
liveData = fetchedProviderData?.[modelId] ?? null;
|
160 |
-
break;
|
161 |
-
default:
|
162 |
-
serverLog(`Live fetch not supported or implemented for provider: ${provider}`);
|
163 |
-
return null;
|
164 |
-
}
|
165 |
-
|
166 |
-
if (liveData !== null) {
|
167 |
-
serverLog(`Live fetch successful for ${provider} - ${modelId}: ${liveData}`);
|
168 |
-
updateCache(provider, modelId, liveData).catch(err => {
|
169 |
-
serverError(`Async cache update failed for ${provider} - ${modelId}:`, err);
|
170 |
-
});
|
171 |
-
return liveData;
|
172 |
-
} else {
|
173 |
-
serverLog(`Live fetch for ${provider} did not return data for model ${modelId}.`);
|
174 |
-
return null;
|
175 |
-
}
|
176 |
-
} catch (error) {
|
177 |
-
serverError(`Error during live fetch for ${provider} - ${modelId}:`, error);
|
178 |
-
return null;
|
179 |
-
}
|
180 |
-
}
|
181 |
-
|
182 |
-
// --- Helper for Build Script ---
|
183 |
-
// Now accepts an apiKeys object
|
184 |
-
export async function fetchAllProviderData(apiKeys: ApiKeys): Promise<MaxTokensCache> {
|
185 |
-
serverLog("Fetching data for all providers...");
|
186 |
-
const results: MaxTokensCache = {};
|
187 |
-
|
188 |
-
// Define fetchers, passing the specific key from the apiKeys object
|
189 |
-
const providerFetchers = [
|
190 |
-
{ name: "cohere", fetcher: () => fetchCohereData(apiKeys.COHERE_API_KEY) },
|
191 |
-
{ name: "together", fetcher: () => fetchTogetherData(apiKeys.TOGETHER_API_KEY) },
|
192 |
-
{ name: "fireworks-ai", fetcher: () => fetchFireworksData(apiKeys.FIREWORKS_API_KEY) },
|
193 |
-
{ name: "hyperbolic", fetcher: () => fetchHyperbolicData(apiKeys.HYPERBOLIC_API_KEY) },
|
194 |
-
{ name: "replicate", fetcher: () => fetchReplicateData(apiKeys.REPLICATE_API_KEY) },
|
195 |
-
{ name: "nebius", fetcher: () => fetchNebiusData(apiKeys.NEBIUS_API_KEY) },
|
196 |
-
{ name: "novita", fetcher: () => fetchNovitaData(apiKeys.NOVITA_API_KEY) },
|
197 |
-
{ name: "sambanova", fetcher: () => fetchSambanovaData(apiKeys.SAMBANOVA_API_KEY) },
|
198 |
-
];
|
199 |
-
|
200 |
-
const settledResults = await Promise.allSettled(providerFetchers.map(p => p.fetcher()));
|
201 |
-
|
202 |
-
settledResults.forEach((result, index) => {
|
203 |
-
const providerInfo = providerFetchers[index];
|
204 |
-
if (!providerInfo) {
|
205 |
-
serverError(`Error: No provider info found for index ${index}`);
|
206 |
-
return;
|
207 |
-
}
|
208 |
-
const providerName = providerInfo.name;
|
209 |
-
|
210 |
-
if (result.status === "fulfilled" && result.value) {
|
211 |
-
if (Object.keys(result.value).length > 0) {
|
212 |
-
results[providerName] = result.value;
|
213 |
-
serverLog(`Successfully fetched data for ${providerName}`);
|
214 |
-
} else {
|
215 |
-
serverLog(`No data returned for ${providerName}.`);
|
216 |
-
}
|
217 |
-
} else if (result.status === "rejected") {
|
218 |
-
serverError(`Error fetching ${providerName} data:`, result.reason);
|
219 |
-
}
|
220 |
-
});
|
221 |
-
|
222 |
-
serverLog("Finished fetching provider data.");
|
223 |
-
return results;
|
224 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/nebius.ts
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
import type { MaxTokensCache } from "./index.js";
|
2 |
-
|
3 |
-
interface NebiusModel {
|
4 |
-
id: string;
|
5 |
-
config?: {
|
6 |
-
max_tokens?: number;
|
7 |
-
};
|
8 |
-
context_length?: number;
|
9 |
-
}
|
10 |
-
|
11 |
-
interface NebiusResponse {
|
12 |
-
data?: NebiusModel[];
|
13 |
-
}
|
14 |
-
|
15 |
-
const NEBIUS_API_URL = "https://api.studio.nebius.com/v1/models?verbose=true";
|
16 |
-
|
17 |
-
export async function fetchNebiusData(apiKey: string | undefined): Promise<MaxTokensCache["nebius"]> {
|
18 |
-
if (!apiKey) {
|
19 |
-
console.warn("Nebius API key not provided. Skipping Nebius fetch.");
|
20 |
-
return {};
|
21 |
-
}
|
22 |
-
try {
|
23 |
-
const response = await fetch(NEBIUS_API_URL, {
|
24 |
-
headers: {
|
25 |
-
Authorization: `Bearer ${apiKey}`,
|
26 |
-
},
|
27 |
-
});
|
28 |
-
if (!response.ok) {
|
29 |
-
throw new Error(`Nebius API request failed: ${response.status} ${response.statusText}`);
|
30 |
-
}
|
31 |
-
const data: NebiusResponse = await response.json();
|
32 |
-
const modelsData: MaxTokensCache["nebius"] = {};
|
33 |
-
|
34 |
-
if (data?.data && Array.isArray(data.data)) {
|
35 |
-
for (const model of data.data) {
|
36 |
-
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
37 |
-
if (model.id && typeof contextLength === "number") {
|
38 |
-
modelsData[model.id] = contextLength;
|
39 |
-
}
|
40 |
-
}
|
41 |
-
} else {
|
42 |
-
console.warn("Unexpected response structure from Nebius API:", data);
|
43 |
-
}
|
44 |
-
return modelsData;
|
45 |
-
} catch (error) {
|
46 |
-
console.error("Error fetching Nebius data:", error);
|
47 |
-
return {};
|
48 |
-
}
|
49 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/novita.ts
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
import type { MaxTokensCache } from "./index.js";
|
2 |
-
|
3 |
-
const NOVITA_API_URL = "https://api.novita.ai/v3/openai/models";
|
4 |
-
|
5 |
-
interface NovitaModel {
|
6 |
-
id: string;
|
7 |
-
object: string;
|
8 |
-
context_size: number;
|
9 |
-
}
|
10 |
-
|
11 |
-
interface NovitaResponse {
|
12 |
-
data: NovitaModel[];
|
13 |
-
}
|
14 |
-
|
15 |
-
export async function fetchNovitaData(apiKey: string | undefined): Promise<MaxTokensCache["novita"]> {
|
16 |
-
if (!apiKey) {
|
17 |
-
console.warn("Novita API key not provided. Skipping Novita fetch.");
|
18 |
-
return {};
|
19 |
-
}
|
20 |
-
try {
|
21 |
-
const response = await fetch(NOVITA_API_URL, {
|
22 |
-
headers: {
|
23 |
-
Authorization: `Bearer ${apiKey}`,
|
24 |
-
},
|
25 |
-
});
|
26 |
-
if (!response.ok) {
|
27 |
-
throw new Error(`Novita API request failed: ${response.status} ${response.statusText}`);
|
28 |
-
}
|
29 |
-
const data: NovitaResponse = await response.json();
|
30 |
-
const modelsData: MaxTokensCache["novita"] = {};
|
31 |
-
|
32 |
-
if (data?.data && Array.isArray(data.data)) {
|
33 |
-
for (const model of data.data) {
|
34 |
-
if (model.id && typeof model.context_size === "number") {
|
35 |
-
modelsData[model.id] = model.context_size;
|
36 |
-
}
|
37 |
-
}
|
38 |
-
} else {
|
39 |
-
console.warn("Unexpected response structure from Novita API:", data);
|
40 |
-
}
|
41 |
-
return modelsData;
|
42 |
-
} catch (error) {
|
43 |
-
console.error("Error fetching Novita data:", error);
|
44 |
-
return {};
|
45 |
-
}
|
46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/replicate.ts
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import type { MaxTokensCache } from "./index.js";
|
2 |
-
|
3 |
-
const REPLICATE_API_URL = "https://api.replicate.com/v1/models";
|
4 |
-
|
5 |
-
export async function fetchReplicateData(apiKey: string | undefined): Promise<MaxTokensCache["replicate"]> {
|
6 |
-
if (!apiKey) {
|
7 |
-
console.warn("Replicate API key not provided. Skipping Replicate fetch.");
|
8 |
-
return {};
|
9 |
-
}
|
10 |
-
try {
|
11 |
-
const response = await fetch(REPLICATE_API_URL, {
|
12 |
-
headers: {
|
13 |
-
Authorization: `Token ${apiKey}`,
|
14 |
-
},
|
15 |
-
});
|
16 |
-
if (!response.ok) {
|
17 |
-
throw new Error(`Replicate API request failed: ${response.status} ${response.statusText}`);
|
18 |
-
}
|
19 |
-
const data = await response.json();
|
20 |
-
const modelsData: MaxTokensCache["replicate"] = {};
|
21 |
-
|
22 |
-
if (data?.results && Array.isArray(data.results)) {
|
23 |
-
for (const model of data.results) {
|
24 |
-
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
25 |
-
if (model.id && typeof contextLength === "number") {
|
26 |
-
modelsData[model.id] = contextLength;
|
27 |
-
}
|
28 |
-
}
|
29 |
-
} else {
|
30 |
-
console.warn("Unexpected response structure from Replicate API:", data);
|
31 |
-
}
|
32 |
-
return modelsData;
|
33 |
-
} catch (error) {
|
34 |
-
console.error("Error fetching Replicate data:", error);
|
35 |
-
return {};
|
36 |
-
}
|
37 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/sambanova.ts
DELETED
@@ -1,52 +0,0 @@
|
|
1 |
-
import type { MaxTokensCache } from "./index.js";
|
2 |
-
|
3 |
-
const SAMBANOVA_API_URL = "https://api.sambanova.ai/v1/models";
|
4 |
-
|
5 |
-
interface SambanovaModel {
|
6 |
-
id: string;
|
7 |
-
object: string;
|
8 |
-
context_length: number;
|
9 |
-
max_completion_tokens?: number;
|
10 |
-
pricing?: {
|
11 |
-
prompt: string;
|
12 |
-
completion: string;
|
13 |
-
};
|
14 |
-
}
|
15 |
-
|
16 |
-
interface SambanovaResponse {
|
17 |
-
data: SambanovaModel[];
|
18 |
-
object: string;
|
19 |
-
}
|
20 |
-
|
21 |
-
export async function fetchSambanovaData(apiKey: string | undefined): Promise<MaxTokensCache["sambanova"]> {
|
22 |
-
if (!apiKey) {
|
23 |
-
console.warn("SambaNova API key not provided. Skipping SambaNova fetch.");
|
24 |
-
return {};
|
25 |
-
}
|
26 |
-
try {
|
27 |
-
const response = await fetch(SAMBANOVA_API_URL, {
|
28 |
-
headers: {
|
29 |
-
Authorization: `Bearer ${apiKey}`,
|
30 |
-
},
|
31 |
-
});
|
32 |
-
if (!response.ok) {
|
33 |
-
throw new Error(`SambaNova API request failed: ${response.status} ${response.statusText}`);
|
34 |
-
}
|
35 |
-
const data: SambanovaResponse = await response.json();
|
36 |
-
const modelsData: MaxTokensCache["sambanova"] = {};
|
37 |
-
|
38 |
-
if (data?.data && Array.isArray(data.data)) {
|
39 |
-
for (const model of data.data) {
|
40 |
-
if (model.id && typeof model.context_length === "number") {
|
41 |
-
modelsData[model.id] = model.context_length;
|
42 |
-
}
|
43 |
-
}
|
44 |
-
} else {
|
45 |
-
console.warn("Unexpected response structure from SambaNova API:", data);
|
46 |
-
}
|
47 |
-
return modelsData;
|
48 |
-
} catch (error) {
|
49 |
-
console.error("Error fetching SambaNova data:", error);
|
50 |
-
return {};
|
51 |
-
}
|
52 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/server/providers/together.ts
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import type { MaxTokensCache } from "./index.js";
|
2 |
-
|
3 |
-
const TOGETHER_API_URL = "https://api.together.xyz/v1/models";
|
4 |
-
|
5 |
-
// Accept apiKey as an argument
|
6 |
-
export async function fetchTogetherData(apiKey: string | undefined): Promise<MaxTokensCache["together"]> {
|
7 |
-
if (!apiKey) {
|
8 |
-
console.warn("Together AI API key not provided. Skipping Together AI fetch.");
|
9 |
-
return {};
|
10 |
-
}
|
11 |
-
try {
|
12 |
-
const response = await fetch(TOGETHER_API_URL, {
|
13 |
-
headers: {
|
14 |
-
Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
|
15 |
-
},
|
16 |
-
});
|
17 |
-
if (!response.ok) {
|
18 |
-
throw new Error(`Together AI API request failed: ${response.status} ${response.statusText}`);
|
19 |
-
}
|
20 |
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
21 |
-
const data: any[] = await response.json();
|
22 |
-
const modelsData: MaxTokensCache["together"] = {};
|
23 |
-
|
24 |
-
if (Array.isArray(data)) {
|
25 |
-
for (const model of data) {
|
26 |
-
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
27 |
-
if (model.id && typeof contextLength === "number") {
|
28 |
-
modelsData[model.id] = contextLength;
|
29 |
-
}
|
30 |
-
}
|
31 |
-
}
|
32 |
-
return modelsData;
|
33 |
-
} catch (error) {
|
34 |
-
console.error("Error fetching Together AI data:", error);
|
35 |
-
return {};
|
36 |
-
}
|
37 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/state/conversations.svelte.ts
CHANGED
@@ -5,9 +5,10 @@ import {
|
|
5 |
import { addToast } from "$lib/components/toaster.svelte.js";
|
6 |
import { AbortManager } from "$lib/spells/abort-manager.svelte";
|
7 |
import { PipelineTag, Provider, type ConversationMessage, type GenerationStatistics, type Model } from "$lib/types.js";
|
8 |
-
import { handleNonStreamingResponse, handleStreamingResponse } from "$lib/utils/business.svelte.js";
|
9 |
import { omit, snapshot } from "$lib/utils/object.svelte";
|
10 |
import { models, structuredForbiddenProviders } from "./models.svelte";
|
|
|
11 |
import { DEFAULT_PROJECT_ID, ProjectEntity, projects } from "./projects.svelte";
|
12 |
import { token } from "./token.svelte";
|
13 |
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
@@ -87,7 +88,7 @@ export class ConversationClass {
|
|
87 |
readonly model = $derived(models.all.find(m => m.id === this.data.modelId) ?? emptyModel);
|
88 |
|
89 |
abortManager = new AbortManager();
|
90 |
-
generationStats = $state({ latency: 0, tokens: 0 }) as GenerationStatistics;
|
91 |
generating = $state(false);
|
92 |
|
93 |
constructor(data: ConversationEntityMembers) {
|
@@ -232,6 +233,17 @@ export class ConversationClass {
|
|
232 |
|
233 |
const endTime = performance.now();
|
234 |
this.generationStats.latency = Math.round(endTime - startTime);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
this.generating = false;
|
236 |
};
|
237 |
|
|
|
5 |
import { addToast } from "$lib/components/toaster.svelte.js";
|
6 |
import { AbortManager } from "$lib/spells/abort-manager.svelte";
|
7 |
import { PipelineTag, Provider, type ConversationMessage, type GenerationStatistics, type Model } from "$lib/types.js";
|
8 |
+
import { handleNonStreamingResponse, handleStreamingResponse, estimateTokens } from "$lib/utils/business.svelte.js";
|
9 |
import { omit, snapshot } from "$lib/utils/object.svelte";
|
10 |
import { models, structuredForbiddenProviders } from "./models.svelte";
|
11 |
+
import { pricing } from "./pricing.svelte.js";
|
12 |
import { DEFAULT_PROJECT_ID, ProjectEntity, projects } from "./projects.svelte";
|
13 |
import { token } from "./token.svelte";
|
14 |
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
|
88 |
readonly model = $derived(models.all.find(m => m.id === this.data.modelId) ?? emptyModel);
|
89 |
|
90 |
abortManager = new AbortManager();
|
91 |
+
generationStats = $state({ latency: 0, tokens: 0, cost: 0 }) as GenerationStatistics;
|
92 |
generating = $state(false);
|
93 |
|
94 |
constructor(data: ConversationEntityMembers) {
|
|
|
233 |
|
234 |
const endTime = performance.now();
|
235 |
this.generationStats.latency = Math.round(endTime - startTime);
|
236 |
+
|
237 |
+
// Calculate cost if we have pricing data
|
238 |
+
if (this.data.provider && this.data.provider !== "auto") {
|
239 |
+
const inputTokens = estimateTokens(this);
|
240 |
+
const outputTokens = this.generationStats.tokens;
|
241 |
+
const costEstimate = pricing.estimateCost(this.model.id, this.data.provider, inputTokens, outputTokens);
|
242 |
+
if (costEstimate) {
|
243 |
+
this.generationStats.cost = costEstimate.total;
|
244 |
+
}
|
245 |
+
}
|
246 |
+
|
247 |
this.generating = false;
|
248 |
};
|
249 |
|
src/lib/state/pricing.svelte.ts
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { page } from "$app/state";
|
2 |
+
import { atLeastNDecimals } from "$lib/utils/number.js";
|
3 |
+
import type { PageData } from "../../routes/$types.js";
|
4 |
+
|
5 |
+
interface RouterProvider {
|
6 |
+
provider: string;
|
7 |
+
status: string;
|
8 |
+
context_length?: number;
|
9 |
+
pricing?: {
|
10 |
+
input: number;
|
11 |
+
output: number;
|
12 |
+
};
|
13 |
+
supports_tools?: boolean;
|
14 |
+
supports_structured_output?: boolean;
|
15 |
+
}
|
16 |
+
|
17 |
+
interface RouterModel {
|
18 |
+
id: string;
|
19 |
+
providers: RouterProvider[];
|
20 |
+
}
|
21 |
+
|
22 |
+
interface RouterData {
|
23 |
+
data: RouterModel[];
|
24 |
+
}
|
25 |
+
|
26 |
+
const pageData = $derived(page.data as PageData & { routerData: RouterData });
|
27 |
+
|
28 |
+
class Pricing {
|
29 |
+
routerData = $derived(pageData.routerData as RouterData);
|
30 |
+
|
31 |
+
getPricing(modelId: string, provider: string) {
|
32 |
+
const model = this.routerData?.data?.find((m: RouterModel) => m.id === modelId);
|
33 |
+
if (!model) return null;
|
34 |
+
|
35 |
+
const providerData = model.providers.find((p: RouterProvider) => p.provider === provider);
|
36 |
+
return providerData?.pricing || null;
|
37 |
+
}
|
38 |
+
|
39 |
+
getContextLength(modelId: string, provider: string) {
|
40 |
+
const model = this.routerData?.data?.find((m: RouterModel) => m.id === modelId);
|
41 |
+
if (!model) return null;
|
42 |
+
|
43 |
+
const providerData = model.providers.find((p: RouterProvider) => p.provider === provider);
|
44 |
+
return providerData?.context_length || null;
|
45 |
+
}
|
46 |
+
|
47 |
+
formatPricing(pricing: { input: number; output: number } | null) {
|
48 |
+
if (!pricing) return null;
|
49 |
+
|
50 |
+
const inputCost = atLeastNDecimals(pricing.input, 2);
|
51 |
+
const outputCost = atLeastNDecimals(pricing.output, 2);
|
52 |
+
|
53 |
+
return {
|
54 |
+
input: `$${inputCost}/1M`,
|
55 |
+
output: `$${outputCost}/1M`,
|
56 |
+
inputRaw: pricing.input,
|
57 |
+
outputRaw: pricing.output,
|
58 |
+
};
|
59 |
+
}
|
60 |
+
|
61 |
+
estimateCost(modelId: string, provider: string, inputTokens: number, outputTokens: number = 0) {
|
62 |
+
const pricing = this.getPricing(modelId, provider);
|
63 |
+
if (!pricing) return null;
|
64 |
+
|
65 |
+
const inputCost = (inputTokens / 1000000) * pricing.input;
|
66 |
+
const outputCost = (outputTokens / 1000000) * pricing.output;
|
67 |
+
const totalCost = inputCost + outputCost;
|
68 |
+
|
69 |
+
return {
|
70 |
+
input: inputCost,
|
71 |
+
output: outputCost,
|
72 |
+
total: totalCost,
|
73 |
+
formatted: `$${totalCost.toFixed(6)}`,
|
74 |
+
};
|
75 |
+
}
|
76 |
+
}
|
77 |
+
|
78 |
+
export const pricing = new Pricing();
|
src/lib/types.ts
CHANGED
@@ -197,6 +197,7 @@ export type ValueOf<T> = T[keyof T];
|
|
197 |
export interface GenerationStatistics {
|
198 |
latency: number;
|
199 |
tokens: number;
|
|
|
200 |
}
|
201 |
|
202 |
export type ModelsJson = {
|
|
|
197 |
export interface GenerationStatistics {
|
198 |
latency: number;
|
199 |
tokens: number;
|
200 |
+
cost?: number;
|
201 |
}
|
202 |
|
203 |
export type ModelsJson = {
|
src/lib/utils/business.svelte.ts
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
*
|
7 |
**/
|
8 |
|
9 |
-
import
|
10 |
import { InferenceClient, snippets } from "@huggingface/inference";
|
11 |
import { ConversationClass, type ConversationEntityMembers } from "$lib/state/conversations.svelte";
|
12 |
import { token } from "$lib/state/token.svelte";
|
@@ -21,7 +21,7 @@ import {
|
|
21 |
type Model,
|
22 |
} from "$lib/types.js";
|
23 |
import { safeParse } from "$lib/utils/json.js";
|
24 |
-
import { omit
|
25 |
import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
|
26 |
import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
|
27 |
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
@@ -71,20 +71,15 @@ type OpenAICompletionMetadata = {
|
|
71 |
type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
|
72 |
|
73 |
export function maxAllowedTokens(conversation: ConversationClass) {
|
74 |
-
const
|
75 |
-
|
76 |
-
const { provider } = conversation.data;
|
77 |
-
|
78 |
-
if (!provider || !isHFModel(model)) return;
|
79 |
-
|
80 |
-
const idOnProvider = model.inferenceProviderMapping.find(data => data.provider === provider)?.providerId;
|
81 |
-
if (!idOnProvider) return;
|
82 |
|
83 |
-
|
84 |
-
|
|
|
85 |
|
86 |
-
|
87 |
-
|
88 |
|
89 |
if (!ctxLength) return customMaxTokens[conversation.model.id] ?? 100000;
|
90 |
return ctxLength;
|
@@ -387,15 +382,16 @@ export async function getTokenizer(model: Model) {
|
|
387 |
}
|
388 |
|
389 |
// When you don't have access to a tokenizer, guesstimate
|
390 |
-
export function estimateTokens(conversation:
|
391 |
-
|
|
|
392 |
return acc + (curr?.content ?? "");
|
393 |
}, "");
|
394 |
|
395 |
return content.length / 4; // 1 token ~ 4 characters
|
396 |
}
|
397 |
|
398 |
-
export async function getTokens(conversation:
|
399 |
const model = conversation.model;
|
400 |
if (isCustomModel(model)) return estimateTokens(conversation);
|
401 |
const tokenizer = await getTokenizer(model);
|
@@ -404,7 +400,7 @@ export async function getTokens(conversation: Conversation): Promise<number> {
|
|
404 |
// This is a simplified version - you might need to adjust based on your exact needs
|
405 |
let formattedText = "";
|
406 |
|
407 |
-
conversation.messages
|
408 |
let content = `<|start_header_id|>${message.role}<|end_header_id|>\n\n${message.content?.trim()}<|eot_id|>`;
|
409 |
|
410 |
// Add BOS token to the first message
|
|
|
6 |
*
|
7 |
**/
|
8 |
|
9 |
+
import { pricing } from "$lib/state/pricing.svelte.js";
|
10 |
import { InferenceClient, snippets } from "@huggingface/inference";
|
11 |
import { ConversationClass, type ConversationEntityMembers } from "$lib/state/conversations.svelte";
|
12 |
import { token } from "$lib/state/token.svelte";
|
|
|
21 |
type Model,
|
22 |
} from "$lib/types.js";
|
23 |
import { safeParse } from "$lib/utils/json.js";
|
24 |
+
import { omit } from "$lib/utils/object.svelte.js";
|
25 |
import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
|
26 |
import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
|
27 |
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
|
|
71 |
type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
|
72 |
|
73 |
export function maxAllowedTokens(conversation: ConversationClass) {
|
74 |
+
const model = conversation.model;
|
75 |
+
const { provider } = conversation.data;
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
if (!provider || !isHFModel(model)) {
|
78 |
+
return customMaxTokens[conversation.model.id] ?? 100000;
|
79 |
+
}
|
80 |
|
81 |
+
// Try to get context length from router data
|
82 |
+
const ctxLength = pricing.getContextLength(model.id, provider);
|
83 |
|
84 |
if (!ctxLength) return customMaxTokens[conversation.model.id] ?? 100000;
|
85 |
return ctxLength;
|
|
|
382 |
}
|
383 |
|
384 |
// When you don't have access to a tokenizer, guesstimate
|
385 |
+
export function estimateTokens(conversation: ConversationClass) {
|
386 |
+
if (!conversation.data.messages) return 0;
|
387 |
+
const content = conversation.data.messages?.reduce((acc, curr) => {
|
388 |
return acc + (curr?.content ?? "");
|
389 |
}, "");
|
390 |
|
391 |
return content.length / 4; // 1 token ~ 4 characters
|
392 |
}
|
393 |
|
394 |
+
export async function getTokens(conversation: ConversationClass): Promise<number> {
|
395 |
const model = conversation.model;
|
396 |
if (isCustomModel(model)) return estimateTokens(conversation);
|
397 |
const tokenizer = await getTokenizer(model);
|
|
|
400 |
// This is a simplified version - you might need to adjust based on your exact needs
|
401 |
let formattedText = "";
|
402 |
|
403 |
+
conversation.data.messages?.forEach((message, index) => {
|
404 |
let content = `<|start_header_id|>${message.role}<|end_header_id|>\n\n${message.content?.trim()}<|eot_id|>`;
|
405 |
|
406 |
// Add BOS token to the first message
|
src/lib/utils/number.ts
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export function atLeastNDecimals(num: number, minDecimals: number): string {
|
2 |
+
return num.toFixed(Math.max(minDecimals, getDecimalPlaces(num)));
|
3 |
+
}
|
4 |
+
|
5 |
+
function getDecimalPlaces(num: number): number {
|
6 |
+
const str = num.toString();
|
7 |
+
const decimalIndex = str.indexOf(".");
|
8 |
+
return decimalIndex === -1 ? 0 : str.length - decimalIndex - 1;
|
9 |
+
}
|
src/routes/+page.ts
CHANGED
@@ -2,7 +2,16 @@ import type { PageLoad } from "./$types.js";
|
|
2 |
import type { ApiModelsResponse } from "./api/models/+server.js";
|
3 |
|
4 |
export const load: PageLoad = async ({ fetch }) => {
|
5 |
-
const
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
};
|
|
|
2 |
import type { ApiModelsResponse } from "./api/models/+server.js";
|
3 |
|
4 |
export const load: PageLoad = async ({ fetch }) => {
|
5 |
+
const [modelsRes, routerRes] = await Promise.all([
|
6 |
+
fetch("/api/models"),
|
7 |
+
fetch("https://router.huggingface.co/v1/models"),
|
8 |
+
]);
|
9 |
+
|
10 |
+
const models: ApiModelsResponse = await modelsRes.json();
|
11 |
+
const routerData = await routerRes.json();
|
12 |
+
|
13 |
+
return {
|
14 |
+
...models,
|
15 |
+
routerData,
|
16 |
+
};
|
17 |
};
|