-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.js
More file actions
88 lines (76 loc) · 2.64 KB
/
Copy pathserver.js
File metadata and controls
88 lines (76 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
const express = require('express');
const path = require('path');
const systemPrompt = require('./prompt');
const app = express();
app.use(express.json({ limit: '1mb' }));
app.use(express.static(path.join(__dirname, 'public')));
app.post('/api/chat', async (req, res) => {
const { messages } = req.body;
if (!Array.isArray(messages) || messages.length === 0) {
return res.status(400).json({ error: 'Nachrichten-Array erforderlich.' });
}
const trimmed = messages.slice(-20);
const fullMessages = [
{ role: 'system', content: systemPrompt },
...trimmed
];
const MODELS = ['qwen/qwen3.5-122b-a10b', 'mistralai/mistral-large'];
try {
let response;
let lastStatus;
for (const model of MODELS) {
try {
response = await fetch('https://integrate.api.nvidia.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${process.env.NVIDIA_API_KEY}`
},
body: JSON.stringify({
model,
messages: fullMessages,
stream: true,
max_tokens: 4096
})
});
if (response.ok) {
console.log(`Using model: ${model}`);
break;
}
lastStatus = response.status;
if (response.status === 429) break;
console.warn(`Model ${model} returned ${response.status}, trying next...`);
} catch (err) {
lastStatus = 0;
console.warn(`Model ${model} fetch failed: ${err.message}, trying next...`);
}
}
if (!response || !response.ok) {
if (lastStatus === 429) {
return res.status(429).json({ error: 'Zu viele Anfragen — kurz warten und nochmal versuchen.' });
}
return res.status(502).json({ error: 'Server gerade nicht erreichbar. Versuch\'s in ein paar Sekunden nochmal.' });
}
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
const reader = response.body.getReader();
const decoder = new TextDecoder();
req.on('close', () => reader.cancel());
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
res.write(decoder.decode(value, { stream: true }));
}
} finally {
res.end();
}
} catch (err) {
if (!res.headersSent) {
res.status(502).json({ error: 'Server gerade nicht erreichbar. Versuch\'s in ein paar Sekunden nochmal.' });
}
}
});
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => console.log(`DateDecoder running on port ${PORT}`));