Files
gyanBuddy/server.js
T
Gitea 9f44ef4ac5
Deploy Node App / deploy (push) Successful in 15s
yml change
2026-06-05 11:49:04 +05:30

301 lines
9.6 KiB
JavaScript

require("dotenv").config();
const express = require("express");
const cors = require("cors");
const OpenAI = require("openai");
const { pipeline } = require("@xenova/transformers");
const { QdrantClient } = require("@qdrant/js-client-rest");
const CONFIG = {
azure: {
endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com",
deployment: process.env.AZURE_DEPLOYMENT || "gpt-4o-mini",
apiVersion: process.env.AZURE_API_VERSION || "2024-08-01-preview",
apiKey: process.env.AZURE_OPENAI_KEY,
},
qdrant: {
url: process.env.QDRANT_URL || "http://20.40.61.65:6333",
collection: process.env.QDRANT_COLLECTION || "pdf_rag",
},
search: {
topK: 20,
minScore: 0.10,
maxContextDocs: 10,
},
port: process.env.PORT || 5000,
};
// ─── Clients ──────────────────────────────────────────────────────────────────
const llm = new OpenAI({
baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
apiKey: CONFIG.azure.apiKey,
defaultHeaders: { "api-key": CONFIG.azure.apiKey },
defaultQuery: { "api-version": CONFIG.azure.apiVersion },
});
const qdrant = new QdrantClient({
url: CONFIG.qdrant.url,
checkCompatibility: false,
timeout: 30000,
});
// ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
let _embedder = null;
async function getEmbedder() {
if (!_embedder) {
console.log("Loading MiniLM model...");
_embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
console.log("Embedding model ready");
}
return _embedder;
}
async function createEmbedding(text) {
const model = await getEmbedder();
const out = await model(text, { pooling: "mean", normalize: true });
return Array.from(out.data);
}
// ─── Qdrant search ────────────────────────────────────────────────────────────
async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
const results = await qdrant.search(CONFIG.qdrant.collection, {
vector: embedding,
limit: topK,
with_payload: true,
score_threshold: minScore,
});
return results
.sort((a, b) => b.score - a.score)
.slice(0, maxContextDocs);
}
// ─── Build LLM context string ─────────────────────────────────────────────────
function buildContext(results) {
return results
.map((item, i) =>
`[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
)
.join("\n\n---\n\n");
}
// ─── LLM call ─────────────────────────────────────────────────────────────────
const SYSTEM_PROMPT = `
You are CPM AI Assistant.
Rules:
- Answer only from the provided information.
- If the answer is not available, reply exactly:
"❌ I could not find this information in the uploaded documents."
- Do not make up information.
- Do not mention documents, context, or chunks.
Response Style:
- Use simple English.
- Keep answers short and clear.
- Use headings and bullet points.
- Highlight important words in **bold**.
- Use emojis in headings.
Format:
# 📋 Topic
## 🎯 Summary
Short answer in 1-2 sentences.
## ✅ Details
- Point 1
- Point 2
- Point 3
## ⚠️ Notes
- Extra information (if available).
`.trim();
async function askLLM(question, context) {
const completion = await llm.chat.completions.create({
model: CONFIG.azure.deployment,
temperature: 0,
max_tokens: 1500,
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
],
});
return completion.choices[0].message.content;
}
// ─── Express app ──────────────────────────────────────────────────────────────
const app = express();
app.use(cors());
app.use(express.json({ limit: "1mb" }));
app.use(express.urlencoded({ extended: true }));
app.use((req, _res, next) => {
console.log(`→ ${req.method} ${req.path}`);
next();
});
// ─── Routes ───────────────────────────────────────────────────────────────────
app.get("/health", (_req, res) => {
res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
});
app.post("/ask", async (req, res) => {
const { question } = req.body ?? {};
if (!question?.trim()) {
return res.status(400).json({ success: false, error: "question is required" });
}
const t0 = Date.now();
try {
const embedding = await createEmbedding(question.trim());
const results = await searchQdrant(embedding);
if (!results.length) {
return res.json({
success: true,
question,
answer: "❌ I could not find this information in the uploaded documents.",
sources: [],
ms: Date.now() - t0,
});
}
const context = buildContext(results);
const answer = await askLLM(question, context);
return res.json({
success: true,
question,
answer,
sources: results.map(r => ({
score: +r.score.toFixed(4),
file: r.payload?.file,
page: r.payload?.page,
chunk: r.payload?.chunk,
})),
ms: Date.now() - t0,
});
} catch (err) {
console.error("❌ /ask error:", err);
return res.status(500).json({ success: false, error: err.message });
}
});
app.post("/ask/stream", async (req, res) => {
const { question } = req.body ?? {};
if (!question?.trim()) {
return res.status(400).json({ success: false, error: "question is required" });
}
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
res.flushHeaders();
const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
try {
send("status", { message: "🔍 Searching documents..." });
const embedding = await createEmbedding(question.trim());
const results = await searchQdrant(embedding);
if (!results.length) {
send("token", { token: "❌", isWord: true });
send("token", { token: "I", isWord: true });
send("token", { token: "could", isWord: true });
send("token", { token: "not", isWord: true });
send("token", { token: "find", isWord: true });
send("token", { token: "this", isWord: true });
send("token", { token: "information", isWord: true });
send("token", { token: "in", isWord: true });
send("token", { token: "the", isWord: true });
send("token", { token: "uploaded", isWord: true });
send("token", { token: "documents.", isWord: true });
send("done", { sources: [] });
return res.end();
}
const sources = results.map(r => ({
score: +r.score.toFixed(4),
file: r.payload?.file,
page: r.payload?.page,
chunk: r.payload?.chunk,
}));
send("sources", { sources });
send("status", { message: "💬 Generating answer..." });
const context = buildContext(results);
const stream = await llm.chat.completions.create({
model: CONFIG.azure.deployment,
temperature: 0,
max_tokens: 1500,
stream: true,
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
],
});
// Buffer to handle tokens that may be split mid-word
let wordBuffer = "";
for await (const chunk of stream) {
const rawToken = chunk.choices[0]?.delta?.content ?? "";
if (!rawToken) continue;
wordBuffer += rawToken;
// Split on whitespace — emit complete words, keep trailing partial
// We preserve newlines/markdown as separate tokens so markdown renders correctly
const parts = wordBuffer.split(/(\s+)/);
// Last element might be an incomplete word — buffer it
wordBuffer = parts.pop() ?? "";
for (const part of parts) {
if (part) {
send("token", { token: part, isWord: /\S/.test(part) });
}
}
}
// Flush any remaining buffered text
if (wordBuffer) {
send("token", { token: wordBuffer, isWord: true });
}
send("done", { sources });
} catch (err) {
console.error("❌ /ask/stream error:", err);
send("error", { error: err.message });
}
res.end();
});
app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" }));
// ─── Start ────────────────────────────────────────────────────────────────────
async function start() {
await getEmbedder();
app.listen(CONFIG.port, () => {
console.log(`Server running on port ${CONFIG.port}`);
});
}
start().catch(err => {
console.error("Fatal startup error:", err);
process.exit(1);
});