require("dotenv").config(); const express = require("express"); const cors = require("cors"); const OpenAI = require("openai"); const { pipeline } = require("@xenova/transformers"); const { QdrantClient } = require("@qdrant/js-client-rest"); const CONFIG = { azure: { endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com", deployment: process.env.AZURE_DEPLOYMENT || "gpt-4o-mini", apiVersion: process.env.AZURE_API_VERSION || "2024-08-01-preview", apiKey: process.env.AZURE_OPENAI_KEY, }, qdrant: { url: process.env.QDRANT_URL || "http://172.236.181.119:6333", collection: process.env.QDRANT_COLLECTION || "pdf_rag", }, search: { topK: 20, minScore: 0.10, maxContextDocs: 10, }, port: process.env.PORT || 5000, }; // ─── Clients ────────────────────────────────────────────────────────────────── const llm = new OpenAI({ baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`, apiKey: CONFIG.azure.apiKey, defaultHeaders: { "api-key": CONFIG.azure.apiKey }, defaultQuery: { "api-version": CONFIG.azure.apiVersion }, }); const qdrant = new QdrantClient({ url: CONFIG.qdrant.url, checkCompatibility: false, timeout: 30000, }); // ─── Embedding model (singleton, lazy-init) ─────────────────────────────────── let _embedder = null; async function getEmbedder() { if (!_embedder) { console.log("Loading MiniLM model..."); _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2"); console.log("Embedding model ready"); } return _embedder; } async function createEmbedding(text) { const model = await getEmbedder(); const out = await model(text, { pooling: "mean", normalize: true }); return Array.from(out.data); } // ─── Qdrant search ──────────────────────────────────────────────────────────── async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) { const results = await qdrant.search(CONFIG.qdrant.collection, { vector: embedding, limit: topK, with_payload: true, score_threshold: minScore, }); return results .sort((a, b) => b.score - a.score) .slice(0, maxContextDocs); } // ─── Build LLM context string ───────────────────────────────────────────────── function buildContext(results) { return results .map((item, i) => `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}` ) .join("\n\n---\n\n"); } // ─── LLM call ───────────────────────────────────────────────────────────────── // const SYSTEM_PROMPT = ` // You are CPM AI Assistant. // Rules: // - Answer only from the provided information. // - If the answer is not available, reply exactly: // "❌ I could not find this information in the uploaded documents." // - Do not make up information. // - Do not mention documents, context, or chunks. // Response Style: // - Use simple English. // - Keep answers short and clear. // - Use headings and bullet points. // - Highlight important words in **bold**. // - Use emojis in headings. // Format: // # 📋 Topic // ## 🎯 Summary // Short answer in 1-2 sentences. // ## ✅ Details // - Point 1 // - Point 2 // - Point 3 // ## ⚠️ Notes // - Extra information (if available). // `.trim(); const SYSTEM_PROMPT = ` You are CPM AI Assistant. Rules: - Answer only from the provided information. - If the answer is not available, reply exactly: "❌ I could not find this information in the uploaded documents." - Do not make up information. - Do not mention documents, context, or chunks. - Reply in the same language and style as the user's question. - If the user asks in Hindi, answer in Hindi. - If the user asks in Hinglish, answer in Hinglish. - If the user asks in English, answer in English. Response Style: - Use simple and easy-to-understand language. - Keep answers short and clear. - Use headings and bullet points when helpful. - Highlight important words in **bold**. Format: # 📋 Topic ## 🎯 Summary Short answer in the user's language. ## ✅ Details - Point 1 - Point 2 - Point 3 ## ⚠️ Notes - Extra information (if available). `.trim(); async function askLLM(question, context) { const completion = await llm.chat.completions.create({ model: CONFIG.azure.deployment, temperature: 0, max_tokens: 1500, messages: [ { role: "system", content: SYSTEM_PROMPT }, { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` }, ], }); return completion.choices[0].message.content; } // ─── Express app ────────────────────────────────────────────────────────────── const app = express(); app.use(cors()); app.use(express.json({ limit: "1mb" })); app.use(express.urlencoded({ extended: true })); app.use((req, _res, next) => { console.log(`→ ${req.method} ${req.path}`); next(); }); // ─── Routes ─────────────────────────────────────────────────────────────────── app.get("/health", (_req, res) => { res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection }); }); app.post("/ask", async (req, res) => { const { question } = req.body ?? {}; if (!question?.trim()) { return res.status(400).json({ success: false, error: "question is required" }); } const t0 = Date.now(); try { const embedding = await createEmbedding(question.trim()); const results = await searchQdrant(embedding); if (!results.length) { return res.json({ success: true, question, answer: "❌ I could not find this information in the uploaded documents.", sources: [], ms: Date.now() - t0, }); } const context = buildContext(results); const answer = await askLLM(question, context); return res.json({ success: true, question, answer, sources: results.map(r => ({ score: +r.score.toFixed(4), file: r.payload?.file, page: r.payload?.page, chunk: r.payload?.chunk, })), ms: Date.now() - t0, }); } catch (err) { console.error("❌ /ask error:", err); return res.status(500).json({ success: false, error: err.message }); } }); app.post("/ask/stream", async (req, res) => { const { question } = req.body ?? {}; if (!question?.trim()) { return res.status(400).json({ success: false, error: "question is required" }); } res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.flushHeaders(); const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`); try { send("status", { message: "🔍 Searching documents..." }); const embedding = await createEmbedding(question.trim()); const results = await searchQdrant(embedding); if (!results.length) { send("token", { token: "❌", isWord: true }); send("token", { token: "I", isWord: true }); send("token", { token: "could", isWord: true }); send("token", { token: "not", isWord: true }); send("token", { token: "find", isWord: true }); send("token", { token: "this", isWord: true }); send("token", { token: "information", isWord: true }); send("token", { token: "in", isWord: true }); send("token", { token: "the", isWord: true }); send("token", { token: "uploaded", isWord: true }); send("token", { token: "documents.", isWord: true }); send("done", { sources: [] }); return res.end(); } const sources = results.map(r => ({ score: +r.score.toFixed(4), file: r.payload?.file, page: r.payload?.page, chunk: r.payload?.chunk, })); send("sources", { sources }); send("status", { message: "💬 Generating answer..." }); const context = buildContext(results); const stream = await llm.chat.completions.create({ model: CONFIG.azure.deployment, temperature: 0, max_tokens: 1500, stream: true, messages: [ { role: "system", content: SYSTEM_PROMPT }, { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` }, ], }); // Buffer to handle tokens that may be split mid-word let wordBuffer = ""; for await (const chunk of stream) { const rawToken = chunk.choices[0]?.delta?.content ?? ""; if (!rawToken) continue; wordBuffer += rawToken; // Split on whitespace — emit complete words, keep trailing partial // We preserve newlines/markdown as separate tokens so markdown renders correctly const parts = wordBuffer.split(/(\s+)/); // Last element might be an incomplete word — buffer it wordBuffer = parts.pop() ?? ""; for (const part of parts) { if (part) { send("token", { token: part, isWord: /\S/.test(part) }); } } } // Flush any remaining buffered text if (wordBuffer) { send("token", { token: wordBuffer, isWord: true }); } send("done", { sources }); } catch (err) { console.error("❌ /ask/stream error:", err); send("error", { error: err.message }); } res.end(); }); app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" })); // ─── Start ──────────────────────────────────────────────────────────────────── async function start() { await getEmbedder(); app.listen(CONFIG.port, () => { console.log(`Server running on port ${CONFIG.port}`); }); } start().catch(err => { console.error("Fatal startup error:", err); process.exit(1); });