Haay's picture
Upload 926 files
519a20c verified
import { Buffer } from 'node:buffer';
import fetch from 'node-fetch';
import express from 'express';
import { speak, languages } from 'google-translate-api-x';
import crypto from 'node:crypto';
import { readSecret, SECRET_KEYS } from './secrets.js';
import { GEMINI_SAFETY } from '../constants.js';
import { getConfigValue, trimTrailingSlash } from '../util.js';
const API_MAKERSUITE = 'https://generativelanguage.googleapis.com';
const API_VERTEX_AI = 'https://us-central1-aiplatform.googleapis.com';
function createWavHeader(dataSize, sampleRate, numChannels = 1, bitsPerSample = 16) {
const header = Buffer.alloc(44);
header.write('RIFF', 0);
header.writeUInt32LE(36 + dataSize, 4);
header.write('WAVE', 8);
header.write('fmt ', 12);
header.writeUInt32LE(16, 16);
header.writeUInt16LE(1, 20);
header.writeUInt16LE(numChannels, 22);
header.writeUInt32LE(sampleRate, 24);
header.writeUInt32LE(sampleRate * numChannels * bitsPerSample / 8, 28);
header.writeUInt16LE(numChannels * bitsPerSample / 8, 32);
header.writeUInt16LE(bitsPerSample, 34);
header.write('data', 36);
header.writeUInt32LE(dataSize, 40);
return header;
}
function createCompleteWavFile(pcmData, sampleRate) {
const header = createWavHeader(pcmData.length, sampleRate);
return Buffer.concat([header, pcmData]);
}
// Vertex AI authentication helper functions
export async function getVertexAIAuth(request) {
const authMode = request.body.vertexai_auth_mode || 'express';
if (request.body.reverse_proxy) {
return {
authHeader: `Bearer ${request.body.proxy_password}`,
authType: 'proxy',
};
}
if (authMode === 'express') {
const apiKey = readSecret(request.user.directories, SECRET_KEYS.VERTEXAI);
if (apiKey) {
return {
authHeader: `Bearer ${apiKey}`,
authType: 'express',
};
}
throw new Error('API key is required for Vertex AI Express mode');
} else if (authMode === 'full') {
// Get service account JSON from backend storage
const serviceAccountJson = readSecret(request.user.directories, SECRET_KEYS.VERTEXAI_SERVICE_ACCOUNT);
if (serviceAccountJson) {
try {
const serviceAccount = JSON.parse(serviceAccountJson);
const jwtToken = await generateJWTToken(serviceAccount);
const accessToken = await getAccessToken(jwtToken);
return {
authHeader: `Bearer ${accessToken}`,
authType: 'full',
};
} catch (error) {
console.error('Failed to authenticate with service account:', error);
throw new Error(`Service account authentication failed: ${error.message}`);
}
}
throw new Error('Service Account JSON is required for Vertex AI Full mode');
}
throw new Error(`Unsupported Vertex AI authentication mode: ${authMode}`);
}
/**
* Generates a JWT token for Google Cloud authentication using service account credentials.
* @param {object} serviceAccount Service account JSON object
* @returns {Promise<string>} JWT token
*/
export async function generateJWTToken(serviceAccount) {
const now = Math.floor(Date.now() / 1000);
const expiry = now + 3600; // 1 hour
const header = {
alg: 'RS256',
typ: 'JWT',
};
const payload = {
iss: serviceAccount.client_email,
scope: 'https://www.googleapis.com/auth/cloud-platform',
aud: 'https://oauth2.googleapis.com/token',
iat: now,
exp: expiry,
};
const headerBase64 = Buffer.from(JSON.stringify(header)).toString('base64url');
const payloadBase64 = Buffer.from(JSON.stringify(payload)).toString('base64url');
const signatureInput = `${headerBase64}.${payloadBase64}`;
// Create signature using private key
const sign = crypto.createSign('RSA-SHA256');
sign.update(signatureInput);
const signature = sign.sign(serviceAccount.private_key, 'base64url');
return `${signatureInput}.${signature}`;
}
export async function getAccessToken(jwtToken) {
const response = await fetch('https://oauth2.googleapis.com/token', {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: new URLSearchParams({
grant_type: 'urn:ietf:params:oauth:grant-type:jwt-bearer',
assertion: jwtToken,
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Failed to get access token: ${error}`);
}
/** @type {any} */
const data = await response.json();
return data.access_token;
}
/**
* Extracts the project ID from a Service Account JSON object.
* @param {object} serviceAccount Service account JSON object
* @returns {string} Project ID
* @throws {Error} If project ID is not found in the service account
*/
export function getProjectIdFromServiceAccount(serviceAccount) {
if (!serviceAccount || typeof serviceAccount !== 'object') {
throw new Error('Invalid service account object');
}
const projectId = serviceAccount.project_id;
if (!projectId || typeof projectId !== 'string') {
throw new Error('Project ID not found in service account JSON');
}
return projectId;
}
/**
* Generates Google API URL and headers based on request configuration
* @param {express.Request} request Express request object
* @param {string} model Model name to use
* @param {string} endpoint API endpoint (default: 'generateContent')
* @returns {Promise<{url: string, headers: object, apiName: string}>} URL, headers, and API name
*/
export async function getGoogleApiConfig(request, model, endpoint = 'generateContent') {
const useVertexAi = request.body.api === 'vertexai';
const region = request.body.vertexai_region || 'us-central1';
const apiName = useVertexAi ? 'Google Vertex AI' : 'Google AI Studio';
let url;
let headers = {
'Content-Type': 'application/json',
};
if (useVertexAi) {
// Get authentication for Vertex AI
const { authHeader, authType } = await getVertexAIAuth(request);
if (authType === 'express') {
// Express mode: use API key parameter
const keyParam = authHeader.replace('Bearer ', '');
const projectId = request.body.vertexai_express_project_id;
const baseUrl = region === 'global'
? 'https://aiplatform.googleapis.com'
: `https://${region}-aiplatform.googleapis.com`;
url = projectId
? `https://aiplatform.googleapis.com/v1/projects/${projectId}/locations/${region}/publishers/google/models/${model}:${endpoint}?key=${keyParam}`
: `${baseUrl}/v1/publishers/google/models/${model}:${endpoint}?key=${keyParam}`;
} else if (authType === 'full') {
// Full mode: use project-specific URL with Authorization header
// Get project ID from Service Account JSON
const serviceAccountJson = readSecret(request.user.directories, SECRET_KEYS.VERTEXAI_SERVICE_ACCOUNT);
if (!serviceAccountJson) {
throw new Error('Vertex AI Service Account JSON is missing.');
}
let projectId;
try {
const serviceAccount = JSON.parse(serviceAccountJson);
projectId = getProjectIdFromServiceAccount(serviceAccount);
} catch (error) {
throw new Error('Failed to extract project ID from Service Account JSON.');
}
// Handle global region differently - no region prefix in hostname
url = region === 'global'
? `https://aiplatform.googleapis.com/v1/projects/${projectId}/locations/${region}/publishers/google/models/${model}:${endpoint}`
: `https://${region}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${region}/publishers/google/models/${model}:${endpoint}`;
headers['Authorization'] = authHeader;
} else {
// Proxy mode: use Authorization header
const apiUrl = trimTrailingSlash(request.body.reverse_proxy || API_VERTEX_AI);
url = `${apiUrl}/v1/publishers/google/models/${model}:${endpoint}`;
headers['Authorization'] = authHeader;
}
} else {
// Google AI Studio
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MAKERSUITE);
const apiUrl = trimTrailingSlash(request.body.reverse_proxy || API_MAKERSUITE);
const apiVersion = getConfigValue('gemini.apiVersion', 'v1beta');
url = `${apiUrl}/${apiVersion}/models/${model}:${endpoint}?key=${apiKey}`;
}
return { url, headers, apiName };
}
export const router = express.Router();
router.post('/caption-image', async (request, response) => {
try {
const mimeType = request.body.image.split(';')[0].split(':')[1];
const base64Data = request.body.image.split(',')[1];
const model = request.body.model || 'gemini-2.0-flash';
const { url, headers, apiName } = await getGoogleApiConfig(request, model);
const body = {
contents: [{
role: 'user',
parts: [
{ text: request.body.prompt },
{
inlineData: {
mimeType: mimeType,
data: base64Data,
},
}],
}],
safetySettings: GEMINI_SAFETY,
};
console.debug(`${apiName} captioning request`, model, body);
const result = await fetch(url, {
body: JSON.stringify(body),
method: 'POST',
headers: headers,
});
if (!result.ok) {
const error = await result.json();
console.error(`${apiName} API returned error: ${result.status} ${result.statusText}`, error);
return response.status(500).send({ error: true });
}
/** @type {any} */
const data = await result.json();
console.info(`${apiName} captioning response`, data);
const candidates = data?.candidates;
if (!candidates) {
return response.status(500).send('No candidates found, image was most likely filtered.');
}
const caption = candidates[0].content.parts[0].text;
if (!caption) {
return response.status(500).send('No caption found');
}
return response.json({ caption });
} catch (error) {
console.error(error);
response.status(500).send('Internal server error');
}
});
router.post('/list-voices', (_, response) => {
return response.json(languages);
});
router.post('/generate-voice', async (request, response) => {
try {
const text = request.body.text;
const voice = request.body.voice ?? 'en';
const result = await speak(text, { to: voice, forceBatch: false });
const buffer = Array.isArray(result)
? Buffer.concat(result.map(x => new Uint8Array(Buffer.from(x.toString(), 'base64'))))
: Buffer.from(result.toString(), 'base64');
response.setHeader('Content-Type', 'audio/mpeg');
return response.send(buffer);
} catch (error) {
console.error('Google Translate TTS generation failed', error);
response.status(500).send('Internal server error');
}
});
router.post('/list-native-voices', async (_, response) => {
try {
// Hardcoded Gemini native TTS voices from official documentation
// Source: https://ai.google.dev/gemini-api/docs/speech-generation#voices
const voices = [
{ name: 'Zephyr', voice_id: 'Zephyr', lang: 'en-US', description: 'Bright' },
{ name: 'Puck', voice_id: 'Puck', lang: 'en-US', description: 'Upbeat' },
{ name: 'Charon', voice_id: 'Charon', lang: 'en-US', description: 'Informative' },
{ name: 'Kore', voice_id: 'Kore', lang: 'en-US', description: 'Firm' },
{ name: 'Fenrir', voice_id: 'Fenrir', lang: 'en-US', description: 'Excitable' },
{ name: 'Leda', voice_id: 'Leda', lang: 'en-US', description: 'Youthful' },
{ name: 'Orus', voice_id: 'Orus', lang: 'en-US', description: 'Firm' },
{ name: 'Aoede', voice_id: 'Aoede', lang: 'en-US', description: 'Breezy' },
{ name: 'Callirhoe', voice_id: 'Callirhoe', lang: 'en-US', description: 'Easy-going' },
{ name: 'Autonoe', voice_id: 'Autonoe', lang: 'en-US', description: 'Bright' },
{ name: 'Enceladus', voice_id: 'Enceladus', lang: 'en-US', description: 'Breathy' },
{ name: 'Iapetus', voice_id: 'Iapetus', lang: 'en-US', description: 'Clear' },
{ name: 'Umbriel', voice_id: 'Umbriel', lang: 'en-US', description: 'Easy-going' },
{ name: 'Algieba', voice_id: 'Algieba', lang: 'en-US', description: 'Smooth' },
{ name: 'Despina', voice_id: 'Despina', lang: 'en-US', description: 'Smooth' },
{ name: 'Erinome', voice_id: 'Erinome', lang: 'en-US', description: 'Clear' },
{ name: 'Algenib', voice_id: 'Algenib', lang: 'en-US', description: 'Gravelly' },
{ name: 'Rasalgethi', voice_id: 'Rasalgethi', lang: 'en-US', description: 'Informative' },
{ name: 'Laomedeia', voice_id: 'Laomedeia', lang: 'en-US', description: 'Upbeat' },
{ name: 'Achernar', voice_id: 'Achernar', lang: 'en-US', description: 'Soft' },
{ name: 'Alnilam', voice_id: 'Alnilam', lang: 'en-US', description: 'Firm' },
{ name: 'Schedar', voice_id: 'Schedar', lang: 'en-US', description: 'Even' },
{ name: 'Gacrux', voice_id: 'Gacrux', lang: 'en-US', description: 'Mature' },
{ name: 'Pulcherrima', voice_id: 'Pulcherrima', lang: 'en-US', description: 'Forward' },
{ name: 'Achird', voice_id: 'Achird', lang: 'en-US', description: 'Friendly' },
{ name: 'Zubenelgenubi', voice_id: 'Zubenelgenubi', lang: 'en-US', description: 'Casual' },
{ name: 'Vindemiatrix', voice_id: 'Vindemiatrix', lang: 'en-US', description: 'Gentle' },
{ name: 'Sadachbia', voice_id: 'Sadachbia', lang: 'en-US', description: 'Lively' },
{ name: 'Sadaltager', voice_id: 'Sadaltager', lang: 'en-US', description: 'Knowledgeable' },
{ name: 'Sulafat', voice_id: 'Sulafat', lang: 'en-US', description: 'Warm' },
];
return response.json({ voices });
} catch (error) {
console.error('Failed to return Google TTS voices:', error);
response.sendStatus(500);
}
});
router.post('/generate-native-tts', async (request, response) => {
try {
const { text, voice, model } = request.body;
const { url, headers, apiName } = await getGoogleApiConfig(request, model);
console.debug(`${apiName} TTS request`, { model, text, voice });
const requestBody = {
contents: [{
role: 'user',
parts: [{ text: text }],
}],
generationConfig: {
responseModalities: ['AUDIO'],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: {
voiceName: voice,
},
},
},
},
safetySettings: GEMINI_SAFETY,
};
const result = await fetch(url, {
method: 'POST',
headers: headers,
body: JSON.stringify(requestBody),
});
if (!result.ok) {
const errorText = await result.text();
console.error(`${apiName} TTS API error: ${result.status} ${result.statusText}`, errorText);
const errorMessage = JSON.parse(errorText).error?.message || 'TTS generation failed.';
return response.status(result.status).json({ error: errorMessage });
}
/** @type {any} */
const data = await result.json();
const audioPart = data?.candidates?.[0]?.content?.parts?.[0];
const audioData = audioPart?.inlineData?.data;
const mimeType = audioPart?.inlineData?.mimeType;
if (!audioData) {
return response.status(500).json({ error: 'No audio data found in response' });
}
const audioBuffer = Buffer.from(audioData, 'base64');
//If the audio is raw PCM, wrap it in a WAV header and send it.
if (mimeType && mimeType.toLowerCase().includes('audio/l16')) {
const rateMatch = mimeType.match(/rate=(\d+)/);
const sampleRate = rateMatch ? parseInt(rateMatch[1], 10) : 24000;
const pcmData = audioBuffer;
// Create a complete, playable WAV file buffer.
const wavBuffer = createCompleteWavFile(pcmData, sampleRate);
// Send the WAV file directly to the browser. This is much faster.
response.setHeader('Content-Type', 'audio/wav');
return response.send(wavBuffer);
}
// Fallback for any other audio format Google might send in the future.
response.setHeader('Content-Type', mimeType || 'application/octet-stream');
response.send(audioBuffer);
} catch (error) {
console.error('Google TTS generation failed:', error);
if (!response.headersSent) {
return response.status(500).json({ error: 'Internal server error during TTS generation' });
}
return response.end();
}
});
router.post('/generate-image', async (request, response) => {
try {
const model = request.body.model || 'imagen-3.0-generate-002';
const { url, headers, apiName } = await getGoogleApiConfig(request, model, 'predict');
// AI Studio is stricter than Vertex AI.
const isVertex = request.body.api === 'vertexai';
// Is it even worth it?
const isDeprecated = model.startsWith('imagegeneration');
const requestBody = {
instances: [{
prompt: request.body.prompt || '',
}],
parameters: {
sampleCount: 1,
seed: isVertex ? Number(request.body.seed ?? Math.floor(Math.random() * 1000000)) : undefined,
enhancePrompt: isVertex ? Boolean(request.body.enhance ?? false) : undefined,
negativePrompt: isVertex ? (request.body.negative_prompt || undefined) : undefined,
aspectRatio: String(request.body.aspect_ratio || '1:1'),
personGeneration: !isDeprecated ? 'allow_all' : undefined,
language: isVertex ? 'auto' : undefined,
safetySetting: !isDeprecated ? (isVertex ? 'block_only_high' : 'block_low_and_above') : undefined,
addWatermark: isVertex ? false : undefined,
outputOptions: {
mimeType: 'image/jpeg',
compressionQuality: 100,
},
},
};
console.debug(`${apiName} image generation request:`, model, requestBody);
const result = await fetch(url, {
method: 'POST',
headers: headers,
body: JSON.stringify(requestBody),
});
if (!result.ok) {
const errorText = await result.text();
console.warn(`${apiName} image generation error: ${result.status} ${result.statusText}`, errorText);
return response.sendStatus(500);
}
/** @type {any} */
const data = await result.json();
const imagePart = data?.predictions?.[0]?.bytesBase64Encoded;
if (!imagePart) {
console.warn(`${apiName} image generation error: No image data found in response`);
return response.sendStatus(500);
}
return response.send({ image: imagePart });
} catch (error) {
console.error('Google Image generation failed:', error);
if (!response.headersSent) {
return response.sendStatus(500);
}
return response.end();
}
});