Spaces:
Running
Running
import fetch from 'node-fetch'; | |
import express from 'express'; | |
import { decode } from 'html-entities'; | |
import { readSecret, SECRET_KEYS } from './secrets.js'; | |
import { trimV1 } from '../util.js'; | |
import { setAdditionalHeaders } from '../additional-headers.js'; | |
export const router = express.Router(); | |
// Cosplay as Chrome | |
const visitHeaders = { | |
'Accept': 'text/html', | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Accept-Encoding': 'gzip, deflate, br', | |
'Connection': 'keep-alive', | |
'Cache-Control': 'no-cache', | |
'Pragma': 'no-cache', | |
'TE': 'trailers', | |
'DNT': '1', | |
'Sec-Fetch-Dest': 'document', | |
'Sec-Fetch-Mode': 'navigate', | |
'Sec-Fetch-Site': 'none', | |
'Sec-Fetch-User': '?1', | |
}; | |
/** | |
* Extract the transcript of a YouTube video | |
* @param {string} videoPageBody HTML of the video page | |
* @param {string} lang Language code | |
* @returns {Promise<string>} Transcript text | |
*/ | |
async function extractTranscript(videoPageBody, lang) { | |
const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g; | |
const splittedHTML = videoPageBody.split('"captions":'); | |
if (splittedHTML.length <= 1) { | |
if (videoPageBody.includes('class="g-recaptcha"')) { | |
throw new Error('Too many requests'); | |
} | |
if (!videoPageBody.includes('"playabilityStatus":')) { | |
throw new Error('Video is not available'); | |
} | |
throw new Error('Transcript not available'); | |
} | |
const captions = (() => { | |
try { | |
return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', '')); | |
} catch (e) { | |
return undefined; | |
} | |
})()?.['playerCaptionsTracklistRenderer']; | |
if (!captions) { | |
throw new Error('Transcript disabled'); | |
} | |
if (!('captionTracks' in captions)) { | |
throw new Error('Transcript not available'); | |
} | |
if (lang && !captions.captionTracks.some(track => track.languageCode === lang)) { | |
throw new Error('Transcript not available in this language'); | |
} | |
const transcriptURL = (lang ? captions.captionTracks.find(track => track.languageCode === lang) : captions.captionTracks[0]).baseUrl; | |
const transcriptResponse = await fetch(transcriptURL, { | |
headers: { | |
...(lang && { 'Accept-Language': lang }), | |
'User-Agent': visitHeaders['User-Agent'], | |
}, | |
}); | |
if (!transcriptResponse.ok) { | |
throw new Error('Transcript request failed'); | |
} | |
const transcriptBody = await transcriptResponse.text(); | |
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)]; | |
const transcript = results.map((result) => ({ | |
text: result[3], | |
duration: parseFloat(result[2]), | |
offset: parseFloat(result[1]), | |
lang: lang ?? captions.captionTracks[0].languageCode, | |
})); | |
// The text is double-encoded | |
const transcriptText = transcript.map((line) => decode(decode(line.text))).join(' '); | |
return transcriptText; | |
} | |
router.post('/serpapi', async (request, response) => { | |
try { | |
const key = readSecret(request.user.directories, SECRET_KEYS.SERPAPI); | |
if (!key) { | |
console.error('No SerpApi key found'); | |
return response.sendStatus(400); | |
} | |
const { query } = request.body; | |
const result = await fetch(`https://serpapi.com/search.json?q=${encodeURIComponent(query)}&api_key=${key}`); | |
console.debug('SerpApi query', query); | |
if (!result.ok) { | |
const text = await result.text(); | |
console.error('SerpApi request failed', result.statusText, text); | |
return response.status(500).send(text); | |
} | |
const data = await result.json(); | |
console.debug('SerpApi response', data); | |
return response.json(data); | |
} catch (error) { | |
console.error(error); | |
return response.sendStatus(500); | |
} | |
}); | |
/** | |
* Get the transcript of a YouTube video | |
* @copyright https://github.com/Kakulukian/youtube-transcript (MIT License) | |
*/ | |
router.post('/transcript', async (request, response) => { | |
try { | |
const id = request.body.id; | |
const lang = request.body.lang; | |
const json = request.body.json; | |
if (!id) { | |
console.error('Id is required for /transcript'); | |
return response.sendStatus(400); | |
} | |
const videoPageResponse = await fetch(`https://www.youtube.com/watch?v=${id}`, { | |
headers: { | |
...(lang && { 'Accept-Language': lang }), | |
'User-Agent': visitHeaders['User-Agent'], | |
}, | |
}); | |
const videoPageBody = await videoPageResponse.text(); | |
try { | |
const transcriptText = await extractTranscript(videoPageBody, lang); | |
return json | |
? response.json({ transcript: transcriptText, html: videoPageBody }) | |
: response.send(transcriptText); | |
} catch (error) { | |
if (json) { | |
return response.json({ html: videoPageBody, transcript: '' }); | |
} | |
throw error; | |
} | |
} catch (error) { | |
console.error(error); | |
return response.sendStatus(500); | |
} | |
}); | |
router.post('/searxng', async (request, response) => { | |
try { | |
const { baseUrl, query, preferences, categories } = request.body; | |
if (!baseUrl || !query) { | |
console.error('Missing required parameters for /searxng'); | |
return response.sendStatus(400); | |
} | |
console.debug('SearXNG query', baseUrl, query); | |
const mainPageUrl = new URL(baseUrl); | |
const mainPageRequest = await fetch(mainPageUrl, { headers: visitHeaders }); | |
if (!mainPageRequest.ok) { | |
console.error('SearXNG request failed', mainPageRequest.statusText); | |
return response.sendStatus(500); | |
} | |
const mainPageText = await mainPageRequest.text(); | |
const clientHref = mainPageText.match(/href="(\/client.+\.css)"/)?.[1]; | |
if (clientHref) { | |
const clientUrl = new URL(clientHref, baseUrl); | |
await fetch(clientUrl, { headers: visitHeaders }); | |
} | |
const searchUrl = new URL('/search', baseUrl); | |
const searchParams = new URLSearchParams(); | |
searchParams.append('q', query); | |
if (preferences) { | |
searchParams.append('preferences', preferences); | |
} | |
if (categories) { | |
searchParams.append('categories', categories); | |
} | |
searchUrl.search = searchParams.toString(); | |
const searchResult = await fetch(searchUrl, { headers: visitHeaders }); | |
if (!searchResult.ok) { | |
const text = await searchResult.text(); | |
console.error('SearXNG request failed', searchResult.statusText, text); | |
return response.sendStatus(500); | |
} | |
const data = await searchResult.text(); | |
return response.send(data); | |
} catch (error) { | |
console.error('SearXNG request failed', error); | |
return response.sendStatus(500); | |
} | |
}); | |
router.post('/tavily', async (request, response) => { | |
try { | |
const apiKey = readSecret(request.user.directories, SECRET_KEYS.TAVILY); | |
if (!apiKey) { | |
console.error('No Tavily key found'); | |
return response.sendStatus(400); | |
} | |
const { query, include_images } = request.body; | |
const body = { | |
query: query, | |
api_key: apiKey, | |
search_depth: 'basic', | |
topic: 'general', | |
include_answer: true, | |
include_raw_content: false, | |
include_images: !!include_images, | |
include_image_descriptions: false, | |
include_domains: [], | |
max_results: 10, | |
}; | |
const result = await fetch('https://api.tavily.com/search', { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json', | |
}, | |
body: JSON.stringify(body), | |
}); | |
console.debug('Tavily query', query); | |
if (!result.ok) { | |
const text = await result.text(); | |
console.error('Tavily request failed', result.statusText, text); | |
return response.status(500).send(text); | |
} | |
const data = await result.json(); | |
console.debug('Tavily response', data); | |
return response.json(data); | |
} catch (error) { | |
console.error(error); | |
return response.sendStatus(500); | |
} | |
}); | |
router.post('/koboldcpp', async (request, response) => { | |
try { | |
const { query, url } = request.body; | |
if (!url) { | |
console.error('No URL provided for KoboldCpp search'); | |
return response.sendStatus(400); | |
} | |
console.debug('KoboldCpp search query', query); | |
const baseUrl = trimV1(url); | |
const args = { | |
method: 'POST', | |
headers: {}, | |
body: JSON.stringify({ q: query }), | |
}; | |
setAdditionalHeaders(request, args, baseUrl); | |
const result = await fetch(`${baseUrl}/api/extra/websearch`, args); | |
if (!result.ok) { | |
const text = await result.text(); | |
console.error('KoboldCpp request failed', result.statusText, text); | |
return response.status(500).send(text); | |
} | |
const data = await result.json(); | |
console.debug('KoboldCpp search response', data); | |
return response.json(data); | |
} catch (error) { | |
console.error(error); | |
return response.sendStatus(500); | |
} | |
}); | |
router.post('/serper', async (request, response) => { | |
try { | |
const key = readSecret(request.user.directories, SECRET_KEYS.SERPER); | |
if (!key) { | |
console.error('No Serper key found'); | |
return response.sendStatus(400); | |
} | |
const { query, images } = request.body; | |
const url = images | |
? 'https://google.serper.dev/images' | |
: 'https://google.serper.dev/search'; | |
const result = await fetch(url, { | |
method: 'POST', | |
headers: { | |
'X-API-KEY': key, | |
'Content-Type': 'application/json', | |
}, | |
redirect: 'follow', | |
body: JSON.stringify({ q: query }), | |
}); | |
console.debug('Serper query', query); | |
if (!result.ok) { | |
const text = await result.text(); | |
console.warn('Serper request failed', result.statusText, text); | |
return response.status(500).send(text); | |
} | |
const data = await result.json(); | |
console.debug('Serper response', data); | |
return response.json(data); | |
} catch (error) { | |
console.error(error); | |
return response.sendStatus(500); | |
} | |
}); | |
router.post('/visit', async (request, response) => { | |
try { | |
const url = request.body.url; | |
const html = Boolean(request.body.html ?? true); | |
if (!url) { | |
console.error('No url provided for /visit'); | |
return response.sendStatus(400); | |
} | |
try { | |
const urlObj = new URL(url); | |
// Reject relative URLs | |
if (urlObj.protocol === null || urlObj.host === null) { | |
throw new Error('Invalid URL format'); | |
} | |
// Reject non-HTTP URLs | |
if (urlObj.protocol !== 'http:' && urlObj.protocol !== 'https:') { | |
throw new Error('Invalid protocol'); | |
} | |
// Reject URLs with a non-standard port | |
if (urlObj.port !== '') { | |
throw new Error('Invalid port'); | |
} | |
// Reject IP addresses | |
if (urlObj.hostname.match(/^\d+\.\d+\.\d+\.\d+$/)) { | |
throw new Error('Invalid hostname'); | |
} | |
} catch (error) { | |
console.error('Invalid url provided for /visit', url); | |
return response.sendStatus(400); | |
} | |
console.info('Visiting web URL', url); | |
const result = await fetch(url, { headers: visitHeaders }); | |
if (!result.ok) { | |
console.error(`Visit failed ${result.status} ${result.statusText}`); | |
return response.sendStatus(500); | |
} | |
const contentType = String(result.headers.get('content-type')); | |
if (html) { | |
if (!contentType.includes('text/html')) { | |
console.error(`Visit failed, content-type is ${contentType}, expected text/html`); | |
return response.sendStatus(500); | |
} | |
const text = await result.text(); | |
return response.send(text); | |
} | |
response.setHeader('Content-Type', contentType); | |
const buffer = await result.arrayBuffer(); | |
return response.send(Buffer.from(buffer)); | |
} catch (error) { | |
console.error(error); | |
return response.sendStatus(500); | |
} | |
}); | |