Thomas G. Lopes commited on
Commit
1979653
·
unverified ·
1 Parent(s): 9889bc5

pricing info (#97)

Browse files
package.json CHANGED
@@ -3,7 +3,7 @@
3
  "version": "0.0.1",
4
  "private": true,
5
  "scripts": {
6
- "dev": "pnpm run update-ctx-length && vite dev",
7
  "build": "pnpm run update-ctx-length && vite build",
8
  "preview": "vite preview",
9
  "prepare": "ts-patch install && svelte-kit sync || echo ''",
@@ -12,9 +12,8 @@
12
  "lint": "prettier . --check . && eslint src/",
13
  "format": "prettier . --write .",
14
  "clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'",
15
- "update-ctx-length": "jiti scripts/update-ctx-length.ts",
16
- "test:unit": "vitest",
17
- "test": "npm run test:unit -- --run && npm run test:e2e",
18
  "test:e2e": "playwright test"
19
  },
20
  "devDependencies": {
 
3
  "version": "0.0.1",
4
  "private": true,
5
  "scripts": {
6
+ "dev": "vite dev",
7
  "build": "pnpm run update-ctx-length && vite build",
8
  "preview": "vite preview",
9
  "prepare": "ts-patch install && svelte-kit sync || echo ''",
 
12
  "lint": "prettier . --check . && eslint src/",
13
  "format": "prettier . --write .",
14
  "clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'",
15
+ "test:unit": "vitest --browser.headless",
16
+ "test": "npm run test:unit",
 
17
  "test:e2e": "playwright test"
18
  },
19
  "devDependencies": {
scripts/update-ctx-length.ts DELETED
@@ -1,55 +0,0 @@
1
- import dotenv from "dotenv";
2
- dotenv.config(); // Load .env file into process.env
3
-
4
- import { fetchAllProviderData, type ApiKeys } from "../src/lib/server/providers/index.js"; // Import ApiKeys type
5
- import fs from "fs/promises";
6
- import path from "path";
7
-
8
- const CACHE_FILE_PATH = path.resolve("src/lib/data/context_length.json");
9
-
10
- async function runUpdate() {
11
- console.log("Starting context length cache update...");
12
-
13
- // Gather API keys from process.env
14
- const apiKeys: ApiKeys = {
15
- COHERE_API_KEY: process.env.COHERE_API_KEY,
16
- TOGETHER_API_KEY: process.env.TOGETHER_API_KEY,
17
- FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
18
- HYPERBOLIC_API_KEY: process.env.HYPERBOLIC_API_KEY,
19
- REPLICATE_API_KEY: process.env.REPLICATE_API_KEY,
20
- NEBIUS_API_KEY: process.env.NEBIUS_API_KEY,
21
- NOVITA_API_KEY: process.env.NOVITA_API_KEY,
22
- SAMBANOVA_API_KEY: process.env.SAMBANOVA_API_KEY,
23
- };
24
-
25
- try {
26
- // Fetch data from all supported providers concurrently, passing keys
27
- const fetchedData = await fetchAllProviderData(apiKeys);
28
-
29
- // Read existing manual/cached data
30
- let existingData = {};
31
- try {
32
- const currentCache = await fs.readFile(CACHE_FILE_PATH, "utf-8");
33
- existingData = JSON.parse(currentCache);
34
- } catch {
35
- // Remove unused variable name
36
- console.log("No existing cache file found or error reading, creating new one.");
37
- }
38
-
39
- // Merge fetched data with existing data (fetched data takes precedence)
40
- const combinedData = { ...existingData, ...fetchedData };
41
-
42
- // Write the combined data back to the file
43
- const tempFilePath = CACHE_FILE_PATH + ".tmp";
44
- await fs.writeFile(tempFilePath, JSON.stringify(combinedData, null, "\t"), "utf-8");
45
- await fs.rename(tempFilePath, CACHE_FILE_PATH);
46
-
47
- console.log("Context length cache update complete.");
48
- console.log(`Cache file written to: ${CACHE_FILE_PATH}`);
49
- } catch (error) {
50
- console.error("Error during context length cache update:", error);
51
- process.exit(1); // Exit with error code
52
- }
53
- }
54
-
55
- runUpdate();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/components/inference-playground/playground.svelte CHANGED
@@ -28,6 +28,7 @@
28
  import BillingIndicator from "../billing-indicator.svelte";
29
  import { TEST_IDS } from "$lib/constants.js";
30
  import MessageTextarea from "./message-textarea.svelte";
 
31
 
32
  let viewCode = $state(false);
33
  let viewSettings = $state(false);
@@ -155,7 +156,7 @@
155
  <div
156
  class="pointer-events-none absolute inset-0 flex flex-1 shrink-0 items-center justify-around gap-x-8 text-center text-sm text-gray-500 max-xl:hidden"
157
  >
158
- {#each iterate(conversations.generationStats) as [{ latency, tokens }, isLast]}
159
  {@const baLeft = observed["bottom-actions"].rect.left}
160
  {@const tceRight = observed["token-count-end"].offset.right}
161
  <span
@@ -165,7 +166,7 @@
165
  useRaf: true,
166
  })}
167
  >
168
- {tokens} tokens · Latency {latency}ms
169
  </span>
170
  {/each}
171
  </div>
 
28
  import BillingIndicator from "../billing-indicator.svelte";
29
  import { TEST_IDS } from "$lib/constants.js";
30
  import MessageTextarea from "./message-textarea.svelte";
31
+ import { atLeastNDecimals } from "$lib/utils/number.js";
32
 
33
  let viewCode = $state(false);
34
  let viewSettings = $state(false);
 
156
  <div
157
  class="pointer-events-none absolute inset-0 flex flex-1 shrink-0 items-center justify-around gap-x-8 text-center text-sm text-gray-500 max-xl:hidden"
158
  >
159
+ {#each iterate(conversations.generationStats) as [{ latency, tokens, cost }, isLast]}
160
  {@const baLeft = observed["bottom-actions"].rect.left}
161
  {@const tceRight = observed["token-count-end"].offset.right}
162
  <span
 
166
  useRaf: true,
167
  })}
168
  >
169
+ {tokens} tokens · Latency {latency}ms · Cost ${atLeastNDecimals(cost ?? 0, 1)}
170
  </span>
171
  {/each}
172
  </div>
src/lib/components/inference-playground/provider-select.svelte CHANGED
@@ -1,6 +1,7 @@
1
  <script lang="ts">
2
  import type { ConversationClass } from "$lib/state/conversations.svelte";
3
  import { models } from "$lib/state/models.svelte";
 
4
  import type { Model } from "$lib/types.js";
5
  import { randomPick } from "$lib/utils/array.js";
6
  import { cn } from "$lib/utils/cn.js";
@@ -75,6 +76,13 @@
75
  if (provider in nameMap) return formatName(provider);
76
  return provider === "auto" ? "Auto" : provider;
77
  }
 
 
 
 
 
 
 
78
  </script>
79
 
80
  <div class="flex flex-col gap-2">
@@ -92,9 +100,16 @@
92
  classes,
93
  )}
94
  >
95
- <div class="flex items-center gap-1 text-sm">
96
  <IconProvider provider={conversation.data.provider} />
97
- {getProviderName(conversation.data.provider ?? "") ?? "loading"}
 
 
 
 
 
 
 
98
  </div>
99
  <div
100
  class="absolute right-2 grid size-4 flex-none place-items-center rounded-sm bg-gray-100 text-xs dark:bg-gray-600"
@@ -105,12 +120,22 @@
105
 
106
  <div {...select.content} class="rounded-lg border bg-gray-100 dark:border-gray-700 dark:bg-gray-800">
107
  {#snippet option(provider: string)}
 
108
  <div {...select.getOption(provider)} class="group block w-full p-1 text-sm dark:text-white">
109
  <div
110
  class="flex items-center gap-2 rounded-md px-2 py-1.5 group-data-[highlighted]:bg-gray-200 dark:group-data-[highlighted]:bg-gray-700"
111
  >
112
  <IconProvider {provider} />
113
- {getProviderName(provider)}
 
 
 
 
 
 
 
 
 
114
  </div>
115
  </div>
116
  {/snippet}
 
1
  <script lang="ts">
2
  import type { ConversationClass } from "$lib/state/conversations.svelte";
3
  import { models } from "$lib/state/models.svelte";
4
+ import { pricing } from "$lib/state/pricing.svelte";
5
  import type { Model } from "$lib/types.js";
6
  import { randomPick } from "$lib/utils/array.js";
7
  import { cn } from "$lib/utils/cn.js";
 
76
  if (provider in nameMap) return formatName(provider);
77
  return provider === "auto" ? "Auto" : provider;
78
  }
79
+
80
+ function getProviderPricing(provider: string) {
81
+ if (provider === "auto") return null;
82
+ const pd = pricing.getPricing(conversation.model.id, provider);
83
+ return pricing.formatPricing(pd);
84
+ }
85
+ const providerPricing = $derived(getProviderPricing(conversation.data.provider ?? ""));
86
  </script>
87
 
88
  <div class="flex flex-col gap-2">
 
100
  classes,
101
  )}
102
  >
103
+ <div class="flex items-center gap-2 text-sm">
104
  <IconProvider provider={conversation.data.provider} />
105
+ <div class="flex flex-col items-start">
106
+ <span>{getProviderName(conversation.data.provider ?? "") ?? "loading"}</span>
107
+ {#if providerPricing}
108
+ <span class="text-xs text-gray-500 dark:text-gray-400">
109
+ In: {providerPricing.input} • Out: {providerPricing.output}
110
+ </span>
111
+ {/if}
112
+ </div>
113
  </div>
114
  <div
115
  class="absolute right-2 grid size-4 flex-none place-items-center rounded-sm bg-gray-100 text-xs dark:bg-gray-600"
 
120
 
121
  <div {...select.content} class="rounded-lg border bg-gray-100 dark:border-gray-700 dark:bg-gray-800">
122
  {#snippet option(provider: string)}
123
+ {@const providerPricing = getProviderPricing(provider)}
124
  <div {...select.getOption(provider)} class="group block w-full p-1 text-sm dark:text-white">
125
  <div
126
  class="flex items-center gap-2 rounded-md px-2 py-1.5 group-data-[highlighted]:bg-gray-200 dark:group-data-[highlighted]:bg-gray-700"
127
  >
128
  <IconProvider {provider} />
129
+ <div class="flex flex-col">
130
+ <span>{getProviderName(provider)}</span>
131
+ {#if providerPricing}
132
+ <div class="flex flex-col">
133
+ <span class="text-xs text-gray-500 dark:text-gray-400">
134
+ In: {providerPricing.input} • Out: {providerPricing.output}
135
+ </span>
136
+ </div>
137
+ {/if}
138
+ </div>
139
  </div>
140
  </div>
141
  {/snippet}
src/lib/data/context_length.json DELETED
@@ -1,270 +0,0 @@
1
- {
2
- "replicate": {},
3
- "sambanova": {
4
- "DeepSeek-R1-0528": 32768,
5
- "DeepSeek-R1-Distill-Llama-70B": 131072,
6
- "DeepSeek-V3-0324": 32768,
7
- "E5-Mistral-7B-Instruct": 4096,
8
- "Llama-3.3-Swallow-70B-Instruct-v0.4": 131072,
9
- "Llama-4-Maverick-17B-128E-Instruct": 131072,
10
- "Meta-Llama-3.1-8B-Instruct": 16384,
11
- "Meta-Llama-3.3-70B-Instruct": 131072,
12
- "Qwen3-32B": 32768,
13
- "Whisper-Large-v3": 4096
14
- },
15
- "nebius": {
16
- "meta-llama/Meta-Llama-3.1-8B-Instruct-fast": 131072,
17
- "meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
18
- "meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
19
- "meta-llama/Meta-Llama-3.1-405B-Instruct": 131072,
20
- "meta-llama/Llama-Guard-3-8B": 131072,
21
- "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": 131072,
22
- "mistralai/Mistral-Nemo-Instruct-2407": 128000,
23
- "google/gemma-2-2b-it": 8192,
24
- "google/gemma-2-9b-it-fast": 8192,
25
- "Qwen/Qwen2.5-Coder-7B-fast": 32768,
26
- "Qwen/Qwen2.5-Coder-7B": 32768,
27
- "Qwen/Qwen2.5-Coder-32B-Instruct-fast": 131072,
28
- "Qwen/Qwen2.5-Coder-32B-Instruct": 131072,
29
- "Qwen/Qwen2.5-32B-Instruct-fast": 131072,
30
- "Qwen/Qwen2.5-32B-Instruct": 131072,
31
- "Qwen/Qwen2.5-72B-Instruct-fast": 131072,
32
- "Qwen/Qwen2.5-72B-Instruct": 131072,
33
- "Qwen/Qwen2-VL-72B-Instruct": 32768,
34
- "aaditya/Llama3-OpenBioLLM-70B": 8192,
35
- "BAAI/bge-en-icl": 32768,
36
- "BAAI/bge-multilingual-gemma2": 8192,
37
- "intfloat/e5-mistral-7b-instruct": 32768,
38
- "meta-llama/Llama-3.3-70B-Instruct": 131072,
39
- "meta-llama/Llama-3.3-70B-Instruct-fast": 131072,
40
- "microsoft/phi-4": 16384,
41
- "deepseek-ai/DeepSeek-V3": 163840,
42
- "deepseek-ai/DeepSeek-R1": 163840,
43
- "deepseek-ai/DeepSeek-R1-0528": 131072,
44
- "NousResearch/Hermes-3-Llama-405B": 131072,
45
- "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
46
- "deepseek-ai/DeepSeek-R1-fast": 163840,
47
- "Qwen/QwQ-32B-fast": 131072,
48
- "Qwen/QwQ-32B": 131072,
49
- "Qwen/Qwen3-235B-A22B": 40960,
50
- "Qwen/Qwen3-30B-A3B": 40960,
51
- "Qwen/Qwen3-30B-A3B-fast": 40960,
52
- "Qwen/Qwen3-32B": 40960,
53
- "Qwen/Qwen3-32B-fast": 40960,
54
- "Qwen/Qwen3-14B": 40960,
55
- "Qwen/Qwen3-4B-fast": 40960,
56
- "nvidia/Llama-3_3-Nemotron-Super-49B-v1": 131072,
57
- "mistralai/Mistral-Small-3.1-24B-Instruct-2503": 131072,
58
- "mistralai/Devstral-Small-2505": 128000,
59
- "google/gemma-3-27b-it": 110000,
60
- "google/gemma-3-27b-it-fast": 110000,
61
- "Qwen/Qwen2.5-VL-72B-Instruct": 32000,
62
- "Qwen/Qwen3-Embedding-8B": 40960,
63
- "deepseek-ai/DeepSeek-V3-0324": 163840,
64
- "deepseek-ai/DeepSeek-V3-0324-fast": 163840,
65
- "black-forest-labs/flux-dev": 0,
66
- "black-forest-labs/flux-schnell": 0,
67
- "stability-ai/sdxl": 0
68
- },
69
- "novita": {
70
- "deepseek/deepseek-v3-0324": 163840,
71
- "moonshotai/kimi-k2-instruct": 131072,
72
- "deepseek/deepseek-r1-0528": 163840,
73
- "baidu/ernie-4.5-vl-424b-a47b": 123000,
74
- "baidu/ernie-4.5-300b-a47b-paddle": 123000,
75
- "qwen/qwen3-30b-a3b-fp8": 40960,
76
- "minimaxai/minimax-m1-80k": 1000000,
77
- "deepseek/deepseek-r1-0528-qwen3-8b": 128000,
78
- "qwen/qwen3-32b-fp8": 40960,
79
- "qwen/qwen2.5-vl-72b-instruct": 32768,
80
- "qwen/qwen3-235b-a22b-fp8": 40960,
81
- "deepseek/deepseek-v3-turbo": 64000,
82
- "thudm/glm-4.1v-9b-thinking": 65536,
83
- "meta-llama/llama-4-maverick-17b-128e-instruct-fp8": 1048576,
84
- "google/gemma-3-27b-it": 32000,
85
- "deepseek/deepseek-r1-turbo": 64000,
86
- "Sao10K/L3-8B-Stheno-v3.2": 8192,
87
- "gryphe/mythomax-l2-13b": 4096,
88
- "deepseek/deepseek-prover-v2-671b": 160000,
89
- "meta-llama/llama-4-scout-17b-16e-instruct": 131072,
90
- "deepseek/deepseek-r1-distill-llama-8b": 32000,
91
- "meta-llama/llama-3.1-8b-instruct": 16384,
92
- "deepseek/deepseek-r1-distill-qwen-14b": 64000,
93
- "meta-llama/llama-3.3-70b-instruct": 131072,
94
- "qwen/qwen-2.5-72b-instruct": 32000,
95
- "mistralai/mistral-nemo": 60288,
96
- "deepseek/deepseek-r1-distill-qwen-32b": 64000,
97
- "meta-llama/llama-3-8b-instruct": 8192,
98
- "microsoft/wizardlm-2-8x22b": 65535,
99
- "deepseek/deepseek-r1-distill-llama-70b": 32000,
100
- "mistralai/mistral-7b-instruct": 32768,
101
- "meta-llama/llama-3-70b-instruct": 8192,
102
- "nousresearch/hermes-2-pro-llama-3-8b": 8192,
103
- "sao10k/l3-70b-euryale-v2.1": 8192,
104
- "cognitivecomputations/dolphin-mixtral-8x22b": 16000,
105
- "sophosympatheia/midnight-rose-70b": 4096,
106
- "sao10k/l3-8b-lunaris": 8192,
107
- "baidu/ernie-4.5-vl-28b-a3b": 30000,
108
- "baidu/ernie-4.5-21B-a3b": 120000,
109
- "baidu/ernie-4.5-0.3b": 120000,
110
- "google/gemma-3-1b-it": 32768,
111
- "qwen/qwen3-8b-fp8": 128000,
112
- "qwen/qwen3-4b-fp8": 128000,
113
- "thudm/glm-4-32b-0414": 32000,
114
- "qwen/qwen2.5-7b-instruct": 32000,
115
- "meta-llama/llama-3.2-1b-instruct": 131000,
116
- "meta-llama/llama-3.2-3b-instruct": 32768,
117
- "sao10k/l31-70b-euryale-v2.2": 8192
118
- },
119
- "fal": {
120
- "fal/model-name": 4096
121
- },
122
- "cerebras": {
123
- "cerebras/model-name": 8192
124
- },
125
- "hf-inference": {
126
- "google/gemma-2-9b-it": 8192,
127
- "meta-llama/Meta-Llama-3-8B-Instruct": 8192
128
- },
129
- "hyperbolic": {
130
- "Qwen/Qwen2.5-72B-Instruct": 131072,
131
- "Qwen/Qwen2.5-VL-72B-Instruct": 32768,
132
- "meta-llama/Meta-Llama-3-70B-Instruct": 8192,
133
- "deepseek-ai/DeepSeek-V3": 131072,
134
- "deepseek-ai/DeepSeek-V3-0324": 163840,
135
- "meta-llama/Llama-3.3-70B-Instruct": 131072,
136
- "Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
137
- "meta-llama/Llama-3.2-3B-Instruct": 131072,
138
- "NousResearch/Hermes-3-Llama-3.1-70B": 12288,
139
- "meta-llama/Meta-Llama-3.1-405B-Instruct": 131000,
140
- "meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
141
- "meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
142
- "mistralai/Pixtral-12B-2409": 32768,
143
- "Qwen/Qwen2.5-VL-7B-Instruct": 32768,
144
- "meta-llama/Meta-Llama-3.1-405B-FP8": 32768,
145
- "deepseek-ai/DeepSeek-R1": 163840,
146
- "Qwen/QwQ-32B": 131072
147
- },
148
- "cohere": {
149
- "embed-english-light-v3.0": 512,
150
- "embed-multilingual-v2.0": 256,
151
- "rerank-v3.5": 4096,
152
- "embed-v4.0": 8192,
153
- "rerank-english-v3.0": 4096,
154
- "command-r-08-2024": 132096,
155
- "embed-english-light-v3.0-image": 0,
156
- "embed-english-v3.0-image": 0,
157
- "command-nightly": 288000,
158
- "command-a-03-2025": 288000,
159
- "command-r-plus-08-2024": 132096,
160
- "c4ai-aya-vision-32b": 16384,
161
- "command-r": 132096,
162
- "command-r7b-12-2024": 132000,
163
- "command-a-vision": 128000,
164
- "command-r7b-arabic-02-2025": 128000,
165
- "command-light-nightly": 4096,
166
- "embed-english-v3.0": 512,
167
- "embed-multilingual-light-v3.0-image": 0,
168
- "embed-multilingual-v3.0-image": 0,
169
- "c4ai-aya-expanse-32b": 128000
170
- },
171
- "together": {
172
- "cartesia/sonic": 0,
173
- "black-forest-labs/FLUX.1-kontext-pro": 0,
174
- "Alibaba-NLP/gte-modernbert-base": 8192,
175
- "mistralai/Mistral-7B-Instruct-v0.3": 32768,
176
- "cartesia/sonic-2": 0,
177
- "togethercomputer/MoA-1": 32768,
178
- "meta-llama/Meta-Llama-Guard-3-8B": 8192,
179
- "togethercomputer/m2-bert-80M-32k-retrieval": 32768,
180
- "deepseek-ai/DeepSeek-V3": 131072,
181
- "moonshotai/Kimi-K2-Instruct": 131072,
182
- "Qwen/Qwen2.5-7B-Instruct-Turbo": 32768,
183
- "meta-llama/Llama-3-8b-chat-hf": 8192,
184
- "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815,
185
- "togethercomputer/MoA-1-Turbo": 32768,
186
- "eddiehou/meta-llama/Llama-3.1-405B": 12000,
187
- "mistralai/Mistral-7B-Instruct-v0.2": 32768,
188
- "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072,
189
- "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072,
190
- "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072,
191
- "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072,
192
- "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072,
193
- "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": 8192,
194
- "meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072,
195
- "deepseek-ai/DeepSeek-R1": 163840,
196
- "Qwen/Qwen2.5-VL-72B-Instruct": 32768,
197
- "google/gemma-3n-E4B-it": 32768,
198
- "arcee-ai/AFM-4.5B-Preview": 65536,
199
- "lgai/exaone-3-5-32b-instruct": 32768,
200
- "meta-llama/Llama-3-70b-chat-hf": 8192,
201
- "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
202
- "google/gemma-2-27b-it": 8192,
203
- "Qwen/Qwen2-72B-Instruct": 32768,
204
- "meta-llama/Llama-2-70b-hf": 4096,
205
- "Qwen/Qwen3-235B-A22B-fp8-tput": 40960,
206
- "Salesforce/Llama-Rank-V1": 8192,
207
- "mistralai/Mistral-Small-24B-Instruct-2501": 32768,
208
- "Qwen/Qwen2-VL-72B-Instruct": 32768,
209
- "mixedbread-ai/Mxbai-Rerank-Large-V2": 32768,
210
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072,
211
- "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768,
212
- "meta-llama/Llama-Vision-Free": 131072,
213
- "perplexity-ai/r1-1776": 163840,
214
- "scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192,
215
- "meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072,
216
- "arcee-ai/maestro-reasoning": 131072,
217
- "togethercomputer/Refuel-Llm-V2-Small": 8192,
218
- "meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072,
219
- "Qwen/Qwen2.5-Coder-32B-Instruct": 16384,
220
- "arcee-ai/coder-large": 32768,
221
- "Qwen/QwQ-32B": 131072,
222
- "arcee-ai/virtuoso-large": 131072,
223
- "arcee_ai/arcee-spotlight": 131072,
224
- "arcee-ai/arcee-blitz": 32768,
225
- "deepseek-ai/DeepSeek-R1-0528-tput": 163840,
226
- "arcee-ai/virtuoso-medium-v2": 131072,
227
- "arcee-ai/caller": 32768,
228
- "marin-community/marin-8b-instruct": 4096,
229
- "lgai/exaone-deep-32b": 32768,
230
- "google/gemma-3-27b-it": 65536,
231
- "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
232
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
233
- "mistralai/Mistral-7B-Instruct-v0.1": 32768,
234
- "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576,
235
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072,
236
- "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192,
237
- "scb10x/scb10x-typhoon-2-1-gemma3-12b": 131072,
238
- "meta-llama/Llama-Guard-4-12B": 1048576,
239
- "togethercomputer/Refuel-Llm-V2": 16384,
240
- "Qwen/Qwen2.5-72B-Instruct-Turbo": 131072,
241
- "meta-llama/LlamaGuard-2-8b": 8192,
242
- "meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192,
243
- "intfloat/multilingual-e5-large-instruct": 514,
244
- "meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576,
245
- "yan/deepseek-ai-deepseek-v3": 163839,
246
- "black-forest-labs/FLUX.1-kontext-max": 0
247
- },
248
- "fireworks-ai": {
249
- "accounts/fireworks/models/deepseek-r1-0528": 163840,
250
- "accounts/perplexity/models/r1-1776": 163840,
251
- "accounts/fireworks/models/qwen3-30b-a3b": 131072,
252
- "accounts/fireworks/models/llama4-scout-instruct-basic": 10485760,
253
- "accounts/fireworks/models/llama4-maverick-instruct-basic": 1048576,
254
- "accounts/fireworks/models/llama-v3p1-8b-instruct": 131072,
255
- "accounts/fireworks/models/firesearch-ocr-v6": 8192,
256
- "accounts/fireworks/models/llama-v3p1-405b-instruct": 131072,
257
- "accounts/fireworks/models/mixtral-8x22b-instruct": 65536,
258
- "accounts/fireworks/models/deepseek-r1-basic": 163840,
259
- "accounts/fireworks/models/kimi-k2-instruct": 131072,
260
- "accounts/fireworks/models/llama-v3p1-70b-instruct": 131072,
261
- "accounts/fireworks/models/qwen3-235b-a22b": 131072,
262
- "accounts/fireworks/models/llama-v3p3-70b-instruct": 131072,
263
- "accounts/fireworks/models/deepseek-r1": 163840,
264
- "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new": 131072,
265
- "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b": 131072,
266
- "accounts/fireworks/models/deepseek-v3": 131072,
267
- "accounts/fireworks/models/deepseek-v3-0324": 163840,
268
- "accounts/fireworks/models/qwen2p5-vl-32b-instruct": 128000
269
- }
270
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/cohere.ts DELETED
@@ -1,35 +0,0 @@
1
- import type { MaxTokensCache } from "./index.js";
2
-
3
- const COHERE_API_URL = "https://api.cohere.ai/v1/models";
4
-
5
- // Accept apiKey as an argument
6
- export async function fetchCohereData(apiKey: string | undefined): Promise<MaxTokensCache["cohere"]> {
7
- if (!apiKey) {
8
- console.warn("Cohere API key not provided. Skipping Cohere fetch.");
9
- return {};
10
- }
11
- try {
12
- const response = await fetch(COHERE_API_URL, {
13
- headers: {
14
- Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
15
- },
16
- });
17
- if (!response.ok) {
18
- throw new Error(`Cohere API request failed: ${response.status} ${response.statusText}`);
19
- }
20
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
21
- const data: any = await response.json();
22
- const modelsData: MaxTokensCache["cohere"] = {};
23
- if (data?.models && Array.isArray(data.models)) {
24
- for (const model of data.models) {
25
- if (model.name && typeof model.context_length === "number") {
26
- modelsData[model.name] = model.context_length;
27
- }
28
- }
29
- }
30
- return modelsData;
31
- } catch (error) {
32
- console.error("Error fetching Cohere data:", error);
33
- return {};
34
- }
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/fireworks.ts DELETED
@@ -1,41 +0,0 @@
1
- import type { MaxTokensCache } from "./index.js";
2
-
3
- const FIREWORKS_API_URL = "https://api.fireworks.ai/inference/v1/models"; // Assumed
4
-
5
- export async function fetchFireworksData(apiKey: string | undefined): Promise<MaxTokensCache["fireworks-ai"]> {
6
- if (!apiKey) {
7
- console.warn("Fireworks AI API key not provided. Skipping Fireworks AI fetch.");
8
- return {};
9
- }
10
- try {
11
- const response = await fetch(FIREWORKS_API_URL, {
12
- headers: {
13
- Authorization: `Bearer ${apiKey}`,
14
- },
15
- });
16
- if (!response.ok) {
17
- throw new Error(`Fireworks AI API request failed: ${response.status} ${response.statusText}`);
18
- }
19
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
20
- const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
21
- const modelsData: MaxTokensCache["fireworks-ai"] = {};
22
-
23
- // Check if data and data.data exist and are an array
24
- if (data?.data && Array.isArray(data.data)) {
25
- for (const model of data.data) {
26
- // Check for common context length fields (OpenAI uses context_window)
27
- const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
28
- // Fireworks uses model.id
29
- if (model.id && typeof contextLength === "number") {
30
- modelsData[model.id] = contextLength;
31
- }
32
- }
33
- } else {
34
- console.warn("Unexpected response structure from Fireworks AI API:", data);
35
- }
36
- return modelsData;
37
- } catch (error) {
38
- console.error("Error fetching Fireworks AI data:", error);
39
- return {}; // Return empty on error
40
- }
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/hyperbolic.ts DELETED
@@ -1,41 +0,0 @@
1
- import type { MaxTokensCache } from "./index.js";
2
-
3
- const HYPERBOLIC_API_URL = "https://api.hyperbolic.xyz/v1/models"; // Assumed
4
-
5
- export async function fetchHyperbolicData(apiKey: string | undefined): Promise<MaxTokensCache["hyperbolic"]> {
6
- if (!apiKey) {
7
- console.warn("Hyperbolic API key not provided. Skipping Hyperbolic fetch.");
8
- return {};
9
- }
10
- try {
11
- const response = await fetch(HYPERBOLIC_API_URL, {
12
- headers: {
13
- Authorization: `Bearer ${apiKey}`,
14
- },
15
- });
16
- if (!response.ok) {
17
- throw new Error(`Hyperbolic API request failed: ${response.status} ${response.statusText}`);
18
- }
19
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
20
- const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
21
- const modelsData: MaxTokensCache["hyperbolic"] = {};
22
-
23
- // Check if data and data.data exist and are an array
24
- if (data?.data && Array.isArray(data.data)) {
25
- for (const model of data.data) {
26
- // Check for common context length fields (OpenAI uses context_window)
27
- const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
28
- // Assuming Hyperbolic uses model.id
29
- if (model.id && typeof contextLength === "number") {
30
- modelsData[model.id] = contextLength;
31
- }
32
- }
33
- } else {
34
- console.warn("Unexpected response structure from Hyperbolic API:", data);
35
- }
36
- return modelsData;
37
- } catch (error) {
38
- console.error("Error fetching Hyperbolic data:", error);
39
- return {}; // Return empty on error
40
- }
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/index.ts DELETED
@@ -1,224 +0,0 @@
1
- import fs from "fs/promises";
2
- import path from "path";
3
- import { fetchCohereData } from "./cohere.js";
4
- import { fetchTogetherData } from "./together.js";
5
- import { fetchFireworksData } from "./fireworks.js";
6
- import { fetchHyperbolicData } from "./hyperbolic.js";
7
- import { fetchReplicateData } from "./replicate.js";
8
- import { fetchNebiusData } from "./nebius.js";
9
- import { fetchNovitaData } from "./novita.js";
10
- import { fetchSambanovaData } from "./sambanova.js";
11
-
12
- // --- Constants ---
13
- const CACHE_FILE_PATH = path.resolve("src/lib/server/data/context_length.json");
14
-
15
- // --- Types ---
16
- export interface MaxTokensCache {
17
- [provider: string]: {
18
- [modelId: string]: number;
19
- };
20
- }
21
-
22
- // Type for API keys object passed to fetchAllProviderData
23
- export interface ApiKeys {
24
- COHERE_API_KEY?: string;
25
- TOGETHER_API_KEY?: string;
26
- FIREWORKS_API_KEY?: string;
27
- HYPERBOLIC_API_KEY?: string;
28
- REPLICATE_API_KEY?: string;
29
- NEBIUS_API_KEY?: string;
30
- NOVITA_API_KEY?: string;
31
- SAMBANOVA_API_KEY?: string;
32
- }
33
-
34
- // --- Cache Handling ---
35
- // (readCache and updateCache remain the same)
36
- let memoryCache: MaxTokensCache | null = null;
37
- let cacheReadPromise: Promise<MaxTokensCache> | null = null;
38
-
39
- async function readCache(): Promise<MaxTokensCache> {
40
- if (memoryCache) {
41
- return memoryCache;
42
- }
43
- if (cacheReadPromise) {
44
- return cacheReadPromise;
45
- }
46
- cacheReadPromise = (async () => {
47
- try {
48
- const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
49
- memoryCache = JSON.parse(data) as MaxTokensCache;
50
- return memoryCache!;
51
- } catch (error: unknown) {
52
- if (typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT") {
53
- console.warn(`Cache file not found at ${CACHE_FILE_PATH}, starting with empty cache.`);
54
- memoryCache = {};
55
- return {};
56
- }
57
- console.error("Error reading context length cache file:", error);
58
- memoryCache = {};
59
- return {};
60
- } finally {
61
- cacheReadPromise = null;
62
- }
63
- })();
64
- return cacheReadPromise;
65
- }
66
-
67
- const isBrowser = typeof window !== "undefined";
68
-
69
- function serverLog(...txt: unknown[]) {
70
- if (isBrowser) return;
71
- console.log(...txt);
72
- }
73
-
74
- function serverError(...txt: unknown[]) {
75
- if (isBrowser) return;
76
- console.error(...txt);
77
- }
78
-
79
- async function updateCache(provider: string, modelId: string, maxTokens: number): Promise<void> {
80
- try {
81
- let cache: MaxTokensCache;
82
- try {
83
- const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
84
- cache = JSON.parse(data) as MaxTokensCache;
85
- } catch (readError: unknown) {
86
- if (typeof readError === "object" && readError !== null && "code" in readError && readError.code === "ENOENT") {
87
- cache = {};
88
- } else {
89
- throw readError;
90
- }
91
- }
92
- if (!cache[provider]) {
93
- cache[provider] = {};
94
- }
95
- cache[provider][modelId] = maxTokens;
96
- const tempFilePath = CACHE_FILE_PATH + ".tmp";
97
- await fs.writeFile(tempFilePath, JSON.stringify(cache, null, "\t"), "utf-8");
98
- await fs.rename(tempFilePath, CACHE_FILE_PATH);
99
- memoryCache = cache;
100
- serverLog(`Cache updated for ${provider} - ${modelId}: ${maxTokens}`);
101
- } catch (error) {
102
- serverError(`Error updating context length cache for ${provider} - ${modelId}:`, error);
103
- memoryCache = null;
104
- }
105
- }
106
-
107
- // --- Main Exported Function ---
108
- // Now accepts apiKey as the third argument
109
- export async function getMaxTokens(
110
- provider: string,
111
- modelId: string,
112
- apiKey: string | undefined,
113
- ): Promise<number | null> {
114
- const cache = await readCache();
115
- const cachedValue = cache[provider]?.[modelId];
116
-
117
- if (cachedValue !== undefined) {
118
- return cachedValue;
119
- }
120
-
121
- serverLog(`Cache miss for ${provider} - ${modelId}. Attempting live fetch...`);
122
-
123
- let liveData: number | null = null;
124
- let fetchedProviderData: MaxTokensCache[string] | null = null;
125
-
126
- try {
127
- // Pass the received apiKey to the fetcher functions
128
- switch (provider) {
129
- case "cohere":
130
- fetchedProviderData = await fetchCohereData(apiKey); // Pass apiKey
131
- liveData = fetchedProviderData?.[modelId] ?? null;
132
- break;
133
- case "together":
134
- fetchedProviderData = await fetchTogetherData(apiKey); // Pass apiKey
135
- liveData = fetchedProviderData?.[modelId] ?? null;
136
- break;
137
- case "fireworks-ai":
138
- fetchedProviderData = await fetchFireworksData(apiKey); // Pass apiKey
139
- liveData = fetchedProviderData?.[modelId] ?? null;
140
- break;
141
- case "hyperbolic":
142
- fetchedProviderData = await fetchHyperbolicData(apiKey); // Pass apiKey
143
- liveData = fetchedProviderData?.[modelId] ?? null;
144
- break;
145
- case "replicate":
146
- fetchedProviderData = await fetchReplicateData(apiKey);
147
- liveData = fetchedProviderData?.[modelId] ?? null;
148
- break;
149
- case "nebius":
150
- fetchedProviderData = await fetchNebiusData(apiKey);
151
- liveData = fetchedProviderData?.[modelId] ?? null;
152
- break;
153
- case "novita":
154
- fetchedProviderData = await fetchNovitaData(apiKey);
155
- liveData = fetchedProviderData?.[modelId] ?? null;
156
- break;
157
- case "sambanova":
158
- fetchedProviderData = await fetchSambanovaData(apiKey);
159
- liveData = fetchedProviderData?.[modelId] ?? null;
160
- break;
161
- default:
162
- serverLog(`Live fetch not supported or implemented for provider: ${provider}`);
163
- return null;
164
- }
165
-
166
- if (liveData !== null) {
167
- serverLog(`Live fetch successful for ${provider} - ${modelId}: ${liveData}`);
168
- updateCache(provider, modelId, liveData).catch(err => {
169
- serverError(`Async cache update failed for ${provider} - ${modelId}:`, err);
170
- });
171
- return liveData;
172
- } else {
173
- serverLog(`Live fetch for ${provider} did not return data for model ${modelId}.`);
174
- return null;
175
- }
176
- } catch (error) {
177
- serverError(`Error during live fetch for ${provider} - ${modelId}:`, error);
178
- return null;
179
- }
180
- }
181
-
182
- // --- Helper for Build Script ---
183
- // Now accepts an apiKeys object
184
- export async function fetchAllProviderData(apiKeys: ApiKeys): Promise<MaxTokensCache> {
185
- serverLog("Fetching data for all providers...");
186
- const results: MaxTokensCache = {};
187
-
188
- // Define fetchers, passing the specific key from the apiKeys object
189
- const providerFetchers = [
190
- { name: "cohere", fetcher: () => fetchCohereData(apiKeys.COHERE_API_KEY) },
191
- { name: "together", fetcher: () => fetchTogetherData(apiKeys.TOGETHER_API_KEY) },
192
- { name: "fireworks-ai", fetcher: () => fetchFireworksData(apiKeys.FIREWORKS_API_KEY) },
193
- { name: "hyperbolic", fetcher: () => fetchHyperbolicData(apiKeys.HYPERBOLIC_API_KEY) },
194
- { name: "replicate", fetcher: () => fetchReplicateData(apiKeys.REPLICATE_API_KEY) },
195
- { name: "nebius", fetcher: () => fetchNebiusData(apiKeys.NEBIUS_API_KEY) },
196
- { name: "novita", fetcher: () => fetchNovitaData(apiKeys.NOVITA_API_KEY) },
197
- { name: "sambanova", fetcher: () => fetchSambanovaData(apiKeys.SAMBANOVA_API_KEY) },
198
- ];
199
-
200
- const settledResults = await Promise.allSettled(providerFetchers.map(p => p.fetcher()));
201
-
202
- settledResults.forEach((result, index) => {
203
- const providerInfo = providerFetchers[index];
204
- if (!providerInfo) {
205
- serverError(`Error: No provider info found for index ${index}`);
206
- return;
207
- }
208
- const providerName = providerInfo.name;
209
-
210
- if (result.status === "fulfilled" && result.value) {
211
- if (Object.keys(result.value).length > 0) {
212
- results[providerName] = result.value;
213
- serverLog(`Successfully fetched data for ${providerName}`);
214
- } else {
215
- serverLog(`No data returned for ${providerName}.`);
216
- }
217
- } else if (result.status === "rejected") {
218
- serverError(`Error fetching ${providerName} data:`, result.reason);
219
- }
220
- });
221
-
222
- serverLog("Finished fetching provider data.");
223
- return results;
224
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/nebius.ts DELETED
@@ -1,49 +0,0 @@
1
- import type { MaxTokensCache } from "./index.js";
2
-
3
- interface NebiusModel {
4
- id: string;
5
- config?: {
6
- max_tokens?: number;
7
- };
8
- context_length?: number;
9
- }
10
-
11
- interface NebiusResponse {
12
- data?: NebiusModel[];
13
- }
14
-
15
- const NEBIUS_API_URL = "https://api.studio.nebius.com/v1/models?verbose=true";
16
-
17
- export async function fetchNebiusData(apiKey: string | undefined): Promise<MaxTokensCache["nebius"]> {
18
- if (!apiKey) {
19
- console.warn("Nebius API key not provided. Skipping Nebius fetch.");
20
- return {};
21
- }
22
- try {
23
- const response = await fetch(NEBIUS_API_URL, {
24
- headers: {
25
- Authorization: `Bearer ${apiKey}`,
26
- },
27
- });
28
- if (!response.ok) {
29
- throw new Error(`Nebius API request failed: ${response.status} ${response.statusText}`);
30
- }
31
- const data: NebiusResponse = await response.json();
32
- const modelsData: MaxTokensCache["nebius"] = {};
33
-
34
- if (data?.data && Array.isArray(data.data)) {
35
- for (const model of data.data) {
36
- const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
37
- if (model.id && typeof contextLength === "number") {
38
- modelsData[model.id] = contextLength;
39
- }
40
- }
41
- } else {
42
- console.warn("Unexpected response structure from Nebius API:", data);
43
- }
44
- return modelsData;
45
- } catch (error) {
46
- console.error("Error fetching Nebius data:", error);
47
- return {};
48
- }
49
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/novita.ts DELETED
@@ -1,46 +0,0 @@
1
- import type { MaxTokensCache } from "./index.js";
2
-
3
- const NOVITA_API_URL = "https://api.novita.ai/v3/openai/models";
4
-
5
- interface NovitaModel {
6
- id: string;
7
- object: string;
8
- context_size: number;
9
- }
10
-
11
- interface NovitaResponse {
12
- data: NovitaModel[];
13
- }
14
-
15
- export async function fetchNovitaData(apiKey: string | undefined): Promise<MaxTokensCache["novita"]> {
16
- if (!apiKey) {
17
- console.warn("Novita API key not provided. Skipping Novita fetch.");
18
- return {};
19
- }
20
- try {
21
- const response = await fetch(NOVITA_API_URL, {
22
- headers: {
23
- Authorization: `Bearer ${apiKey}`,
24
- },
25
- });
26
- if (!response.ok) {
27
- throw new Error(`Novita API request failed: ${response.status} ${response.statusText}`);
28
- }
29
- const data: NovitaResponse = await response.json();
30
- const modelsData: MaxTokensCache["novita"] = {};
31
-
32
- if (data?.data && Array.isArray(data.data)) {
33
- for (const model of data.data) {
34
- if (model.id && typeof model.context_size === "number") {
35
- modelsData[model.id] = model.context_size;
36
- }
37
- }
38
- } else {
39
- console.warn("Unexpected response structure from Novita API:", data);
40
- }
41
- return modelsData;
42
- } catch (error) {
43
- console.error("Error fetching Novita data:", error);
44
- return {};
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/replicate.ts DELETED
@@ -1,37 +0,0 @@
1
- import type { MaxTokensCache } from "./index.js";
2
-
3
- const REPLICATE_API_URL = "https://api.replicate.com/v1/models";
4
-
5
- export async function fetchReplicateData(apiKey: string | undefined): Promise<MaxTokensCache["replicate"]> {
6
- if (!apiKey) {
7
- console.warn("Replicate API key not provided. Skipping Replicate fetch.");
8
- return {};
9
- }
10
- try {
11
- const response = await fetch(REPLICATE_API_URL, {
12
- headers: {
13
- Authorization: `Token ${apiKey}`,
14
- },
15
- });
16
- if (!response.ok) {
17
- throw new Error(`Replicate API request failed: ${response.status} ${response.statusText}`);
18
- }
19
- const data = await response.json();
20
- const modelsData: MaxTokensCache["replicate"] = {};
21
-
22
- if (data?.results && Array.isArray(data.results)) {
23
- for (const model of data.results) {
24
- const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
25
- if (model.id && typeof contextLength === "number") {
26
- modelsData[model.id] = contextLength;
27
- }
28
- }
29
- } else {
30
- console.warn("Unexpected response structure from Replicate API:", data);
31
- }
32
- return modelsData;
33
- } catch (error) {
34
- console.error("Error fetching Replicate data:", error);
35
- return {};
36
- }
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/sambanova.ts DELETED
@@ -1,52 +0,0 @@
1
- import type { MaxTokensCache } from "./index.js";
2
-
3
- const SAMBANOVA_API_URL = "https://api.sambanova.ai/v1/models";
4
-
5
- interface SambanovaModel {
6
- id: string;
7
- object: string;
8
- context_length: number;
9
- max_completion_tokens?: number;
10
- pricing?: {
11
- prompt: string;
12
- completion: string;
13
- };
14
- }
15
-
16
- interface SambanovaResponse {
17
- data: SambanovaModel[];
18
- object: string;
19
- }
20
-
21
- export async function fetchSambanovaData(apiKey: string | undefined): Promise<MaxTokensCache["sambanova"]> {
22
- if (!apiKey) {
23
- console.warn("SambaNova API key not provided. Skipping SambaNova fetch.");
24
- return {};
25
- }
26
- try {
27
- const response = await fetch(SAMBANOVA_API_URL, {
28
- headers: {
29
- Authorization: `Bearer ${apiKey}`,
30
- },
31
- });
32
- if (!response.ok) {
33
- throw new Error(`SambaNova API request failed: ${response.status} ${response.statusText}`);
34
- }
35
- const data: SambanovaResponse = await response.json();
36
- const modelsData: MaxTokensCache["sambanova"] = {};
37
-
38
- if (data?.data && Array.isArray(data.data)) {
39
- for (const model of data.data) {
40
- if (model.id && typeof model.context_length === "number") {
41
- modelsData[model.id] = model.context_length;
42
- }
43
- }
44
- } else {
45
- console.warn("Unexpected response structure from SambaNova API:", data);
46
- }
47
- return modelsData;
48
- } catch (error) {
49
- console.error("Error fetching SambaNova data:", error);
50
- return {};
51
- }
52
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/server/providers/together.ts DELETED
@@ -1,37 +0,0 @@
1
- import type { MaxTokensCache } from "./index.js";
2
-
3
- const TOGETHER_API_URL = "https://api.together.xyz/v1/models";
4
-
5
- // Accept apiKey as an argument
6
- export async function fetchTogetherData(apiKey: string | undefined): Promise<MaxTokensCache["together"]> {
7
- if (!apiKey) {
8
- console.warn("Together AI API key not provided. Skipping Together AI fetch.");
9
- return {};
10
- }
11
- try {
12
- const response = await fetch(TOGETHER_API_URL, {
13
- headers: {
14
- Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
15
- },
16
- });
17
- if (!response.ok) {
18
- throw new Error(`Together AI API request failed: ${response.status} ${response.statusText}`);
19
- }
20
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
21
- const data: any[] = await response.json();
22
- const modelsData: MaxTokensCache["together"] = {};
23
-
24
- if (Array.isArray(data)) {
25
- for (const model of data) {
26
- const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
27
- if (model.id && typeof contextLength === "number") {
28
- modelsData[model.id] = contextLength;
29
- }
30
- }
31
- }
32
- return modelsData;
33
- } catch (error) {
34
- console.error("Error fetching Together AI data:", error);
35
- return {};
36
- }
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/state/conversations.svelte.ts CHANGED
@@ -5,9 +5,10 @@ import {
5
  import { addToast } from "$lib/components/toaster.svelte.js";
6
  import { AbortManager } from "$lib/spells/abort-manager.svelte";
7
  import { PipelineTag, Provider, type ConversationMessage, type GenerationStatistics, type Model } from "$lib/types.js";
8
- import { handleNonStreamingResponse, handleStreamingResponse } from "$lib/utils/business.svelte.js";
9
  import { omit, snapshot } from "$lib/utils/object.svelte";
10
  import { models, structuredForbiddenProviders } from "./models.svelte";
 
11
  import { DEFAULT_PROJECT_ID, ProjectEntity, projects } from "./projects.svelte";
12
  import { token } from "./token.svelte";
13
  // eslint-disable-next-line @typescript-eslint/ban-ts-comment
@@ -87,7 +88,7 @@ export class ConversationClass {
87
  readonly model = $derived(models.all.find(m => m.id === this.data.modelId) ?? emptyModel);
88
 
89
  abortManager = new AbortManager();
90
- generationStats = $state({ latency: 0, tokens: 0 }) as GenerationStatistics;
91
  generating = $state(false);
92
 
93
  constructor(data: ConversationEntityMembers) {
@@ -232,6 +233,17 @@ export class ConversationClass {
232
 
233
  const endTime = performance.now();
234
  this.generationStats.latency = Math.round(endTime - startTime);
 
 
 
 
 
 
 
 
 
 
 
235
  this.generating = false;
236
  };
237
 
 
5
  import { addToast } from "$lib/components/toaster.svelte.js";
6
  import { AbortManager } from "$lib/spells/abort-manager.svelte";
7
  import { PipelineTag, Provider, type ConversationMessage, type GenerationStatistics, type Model } from "$lib/types.js";
8
+ import { handleNonStreamingResponse, handleStreamingResponse, estimateTokens } from "$lib/utils/business.svelte.js";
9
  import { omit, snapshot } from "$lib/utils/object.svelte";
10
  import { models, structuredForbiddenProviders } from "./models.svelte";
11
+ import { pricing } from "./pricing.svelte.js";
12
  import { DEFAULT_PROJECT_ID, ProjectEntity, projects } from "./projects.svelte";
13
  import { token } from "./token.svelte";
14
  // eslint-disable-next-line @typescript-eslint/ban-ts-comment
 
88
  readonly model = $derived(models.all.find(m => m.id === this.data.modelId) ?? emptyModel);
89
 
90
  abortManager = new AbortManager();
91
+ generationStats = $state({ latency: 0, tokens: 0, cost: 0 }) as GenerationStatistics;
92
  generating = $state(false);
93
 
94
  constructor(data: ConversationEntityMembers) {
 
233
 
234
  const endTime = performance.now();
235
  this.generationStats.latency = Math.round(endTime - startTime);
236
+
237
+ // Calculate cost if we have pricing data
238
+ if (this.data.provider && this.data.provider !== "auto") {
239
+ const inputTokens = estimateTokens(this);
240
+ const outputTokens = this.generationStats.tokens;
241
+ const costEstimate = pricing.estimateCost(this.model.id, this.data.provider, inputTokens, outputTokens);
242
+ if (costEstimate) {
243
+ this.generationStats.cost = costEstimate.total;
244
+ }
245
+ }
246
+
247
  this.generating = false;
248
  };
249
 
src/lib/state/pricing.svelte.ts ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { page } from "$app/state";
2
+ import { atLeastNDecimals } from "$lib/utils/number.js";
3
+ import type { PageData } from "../../routes/$types.js";
4
+
5
+ interface RouterProvider {
6
+ provider: string;
7
+ status: string;
8
+ context_length?: number;
9
+ pricing?: {
10
+ input: number;
11
+ output: number;
12
+ };
13
+ supports_tools?: boolean;
14
+ supports_structured_output?: boolean;
15
+ }
16
+
17
+ interface RouterModel {
18
+ id: string;
19
+ providers: RouterProvider[];
20
+ }
21
+
22
+ interface RouterData {
23
+ data: RouterModel[];
24
+ }
25
+
26
+ const pageData = $derived(page.data as PageData & { routerData: RouterData });
27
+
28
+ class Pricing {
29
+ routerData = $derived(pageData.routerData as RouterData);
30
+
31
+ getPricing(modelId: string, provider: string) {
32
+ const model = this.routerData?.data?.find((m: RouterModel) => m.id === modelId);
33
+ if (!model) return null;
34
+
35
+ const providerData = model.providers.find((p: RouterProvider) => p.provider === provider);
36
+ return providerData?.pricing || null;
37
+ }
38
+
39
+ getContextLength(modelId: string, provider: string) {
40
+ const model = this.routerData?.data?.find((m: RouterModel) => m.id === modelId);
41
+ if (!model) return null;
42
+
43
+ const providerData = model.providers.find((p: RouterProvider) => p.provider === provider);
44
+ return providerData?.context_length || null;
45
+ }
46
+
47
+ formatPricing(pricing: { input: number; output: number } | null) {
48
+ if (!pricing) return null;
49
+
50
+ const inputCost = atLeastNDecimals(pricing.input, 2);
51
+ const outputCost = atLeastNDecimals(pricing.output, 2);
52
+
53
+ return {
54
+ input: `$${inputCost}/1M`,
55
+ output: `$${outputCost}/1M`,
56
+ inputRaw: pricing.input,
57
+ outputRaw: pricing.output,
58
+ };
59
+ }
60
+
61
+ estimateCost(modelId: string, provider: string, inputTokens: number, outputTokens: number = 0) {
62
+ const pricing = this.getPricing(modelId, provider);
63
+ if (!pricing) return null;
64
+
65
+ const inputCost = (inputTokens / 1000000) * pricing.input;
66
+ const outputCost = (outputTokens / 1000000) * pricing.output;
67
+ const totalCost = inputCost + outputCost;
68
+
69
+ return {
70
+ input: inputCost,
71
+ output: outputCost,
72
+ total: totalCost,
73
+ formatted: `$${totalCost.toFixed(6)}`,
74
+ };
75
+ }
76
+ }
77
+
78
+ export const pricing = new Pricing();
src/lib/types.ts CHANGED
@@ -197,6 +197,7 @@ export type ValueOf<T> = T[keyof T];
197
  export interface GenerationStatistics {
198
  latency: number;
199
  tokens: number;
 
200
  }
201
 
202
  export type ModelsJson = {
 
197
  export interface GenerationStatistics {
198
  latency: number;
199
  tokens: number;
200
+ cost?: number;
201
  }
202
 
203
  export type ModelsJson = {
src/lib/utils/business.svelte.ts CHANGED
@@ -6,7 +6,7 @@
6
  *
7
  **/
8
 
9
- import ctxLengthData from "$lib/data/context_length.json";
10
  import { InferenceClient, snippets } from "@huggingface/inference";
11
  import { ConversationClass, type ConversationEntityMembers } from "$lib/state/conversations.svelte";
12
  import { token } from "$lib/state/token.svelte";
@@ -21,7 +21,7 @@ import {
21
  type Model,
22
  } from "$lib/types.js";
23
  import { safeParse } from "$lib/utils/json.js";
24
- import { omit, tryGet } from "$lib/utils/object.svelte.js";
25
  import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
26
  import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
27
  import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
@@ -71,20 +71,15 @@ type OpenAICompletionMetadata = {
71
  type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
72
 
73
  export function maxAllowedTokens(conversation: ConversationClass) {
74
- const ctxLength = (() => {
75
- const model = conversation.model;
76
- const { provider } = conversation.data;
77
-
78
- if (!provider || !isHFModel(model)) return;
79
-
80
- const idOnProvider = model.inferenceProviderMapping.find(data => data.provider === provider)?.providerId;
81
- if (!idOnProvider) return;
82
 
83
- const models = tryGet(ctxLengthData, provider);
84
- if (!models) return;
 
85
 
86
- return tryGet(models, idOnProvider) as number | undefined;
87
- })();
88
 
89
  if (!ctxLength) return customMaxTokens[conversation.model.id] ?? 100000;
90
  return ctxLength;
@@ -387,15 +382,16 @@ export async function getTokenizer(model: Model) {
387
  }
388
 
389
  // When you don't have access to a tokenizer, guesstimate
390
- export function estimateTokens(conversation: Conversation) {
391
- const content = conversation.messages.reduce((acc, curr) => {
 
392
  return acc + (curr?.content ?? "");
393
  }, "");
394
 
395
  return content.length / 4; // 1 token ~ 4 characters
396
  }
397
 
398
- export async function getTokens(conversation: Conversation): Promise<number> {
399
  const model = conversation.model;
400
  if (isCustomModel(model)) return estimateTokens(conversation);
401
  const tokenizer = await getTokenizer(model);
@@ -404,7 +400,7 @@ export async function getTokens(conversation: Conversation): Promise<number> {
404
  // This is a simplified version - you might need to adjust based on your exact needs
405
  let formattedText = "";
406
 
407
- conversation.messages.forEach((message, index) => {
408
  let content = `<|start_header_id|>${message.role}<|end_header_id|>\n\n${message.content?.trim()}<|eot_id|>`;
409
 
410
  // Add BOS token to the first message
 
6
  *
7
  **/
8
 
9
+ import { pricing } from "$lib/state/pricing.svelte.js";
10
  import { InferenceClient, snippets } from "@huggingface/inference";
11
  import { ConversationClass, type ConversationEntityMembers } from "$lib/state/conversations.svelte";
12
  import { token } from "$lib/state/token.svelte";
 
21
  type Model,
22
  } from "$lib/types.js";
23
  import { safeParse } from "$lib/utils/json.js";
24
+ import { omit } from "$lib/utils/object.svelte.js";
25
  import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
26
  import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
27
  import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
 
71
  type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
72
 
73
  export function maxAllowedTokens(conversation: ConversationClass) {
74
+ const model = conversation.model;
75
+ const { provider } = conversation.data;
 
 
 
 
 
 
76
 
77
+ if (!provider || !isHFModel(model)) {
78
+ return customMaxTokens[conversation.model.id] ?? 100000;
79
+ }
80
 
81
+ // Try to get context length from router data
82
+ const ctxLength = pricing.getContextLength(model.id, provider);
83
 
84
  if (!ctxLength) return customMaxTokens[conversation.model.id] ?? 100000;
85
  return ctxLength;
 
382
  }
383
 
384
  // When you don't have access to a tokenizer, guesstimate
385
+ export function estimateTokens(conversation: ConversationClass) {
386
+ if (!conversation.data.messages) return 0;
387
+ const content = conversation.data.messages?.reduce((acc, curr) => {
388
  return acc + (curr?.content ?? "");
389
  }, "");
390
 
391
  return content.length / 4; // 1 token ~ 4 characters
392
  }
393
 
394
+ export async function getTokens(conversation: ConversationClass): Promise<number> {
395
  const model = conversation.model;
396
  if (isCustomModel(model)) return estimateTokens(conversation);
397
  const tokenizer = await getTokenizer(model);
 
400
  // This is a simplified version - you might need to adjust based on your exact needs
401
  let formattedText = "";
402
 
403
+ conversation.data.messages?.forEach((message, index) => {
404
  let content = `<|start_header_id|>${message.role}<|end_header_id|>\n\n${message.content?.trim()}<|eot_id|>`;
405
 
406
  // Add BOS token to the first message
src/lib/utils/number.ts ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ export function atLeastNDecimals(num: number, minDecimals: number): string {
2
+ return num.toFixed(Math.max(minDecimals, getDecimalPlaces(num)));
3
+ }
4
+
5
+ function getDecimalPlaces(num: number): number {
6
+ const str = num.toString();
7
+ const decimalIndex = str.indexOf(".");
8
+ return decimalIndex === -1 ? 0 : str.length - decimalIndex - 1;
9
+ }
src/routes/+page.ts CHANGED
@@ -2,7 +2,16 @@ import type { PageLoad } from "./$types.js";
2
  import type { ApiModelsResponse } from "./api/models/+server.js";
3
 
4
  export const load: PageLoad = async ({ fetch }) => {
5
- const res = await fetch("/api/models");
6
- const json: ApiModelsResponse = await res.json();
7
- return json;
 
 
 
 
 
 
 
 
 
8
  };
 
2
  import type { ApiModelsResponse } from "./api/models/+server.js";
3
 
4
  export const load: PageLoad = async ({ fetch }) => {
5
+ const [modelsRes, routerRes] = await Promise.all([
6
+ fetch("/api/models"),
7
+ fetch("https://router.huggingface.co/v1/models"),
8
+ ]);
9
+
10
+ const models: ApiModelsResponse = await modelsRes.json();
11
+ const routerData = await routerRes.json();
12
+
13
+ return {
14
+ ...models,
15
+ routerData,
16
+ };
17
  };