// // // require("dotenv").config(); // const express = require("express"); // const cors = require("cors"); // const OpenAI = require("openai"); // const { // pipeline, // } = require("@xenova/transformers"); // const { // QdrantClient, // } = require("@qdrant/js-client-rest"); // const app = express(); // app.use(cors()); // app.use(express.json()); // // ====================== // // AZURE OPENAI // // ====================== // const azureEndpoint = // "https://cpmindiayoda-resource.services.ai.azure.com"; // const deploymentName = "gpt-4o-mini"; // const apiVersion = // "2024-08-01-preview"; // const llm = new OpenAI({ // baseURL: // `${azureEndpoint}/openai/deployments/${deploymentName}`, // apiKey: // process.env.AZURE_OPENAI_KEY, // defaultHeaders: { // "api-key": // process.env.AZURE_OPENAI_KEY, // }, // defaultQuery: { // "api-version": // apiVersion, // }, // }); // // ====================== // // QDRANT // // ====================== // const qdrant = new QdrantClient({ // url: "http://20.40.61.65:6333", // checkCompatibility: false, // timeout: 30000, // }); // const COLLECTION_NAME = // "pdf_rag"; // let embedder; // // ====================== // // LOAD EMBEDDING MODEL // // ====================== // async function loadModel() { // console.log( // "Loading MiniLM model..." // ); // embedder = await pipeline( // "feature-extraction", // "Xenova/all-MiniLM-L6-v2" // ); // console.log( // "Embedding model loaded" // ); // } // // ====================== // // EMBEDDING // // ====================== // async function createEmbedding( // text // ) { // const output = // await embedder(text, { // pooling: "mean", // normalize: true, // }); // return Array.from(output.data); // } // // ====================== // // HEALTH // // ====================== // app.get("/", (req, res) => { // res.json({ // success: true, // message: // "Qdrant + Azure GPT RAG Running", // }); // }); // // ====================== // // ASK API // // ====================== // app.post( // "/ask", // async (req, res) => { // try { // const { question } = // req.body; // if (!question) { // return res // .status(400) // .json({ // success: false, // error: // "Question is required", // }); // } // console.log( // "Question:", // question // ); // // ====================== // // CREATE EMBEDDING // // ====================== // const embedding = // await createEmbedding( // question // ); // // ====================== // // SEARCH QDRANT // // ====================== // const searchResult = // await qdrant.search( // COLLECTION_NAME, // { // vector: embedding, // limit: 20, // } // ); // const filteredResults = searchResult.filter( // item => item.score >= 0.10 // ); // console.log( // "Results:", // filteredResults.length, // ); // if ( // !filteredResults.length // ) { // return res.json({ // success: true, // answer: // "No relevant information found.", // sources: [], // }); // } // // ====================== // // CONTEXT // // ====================== // const context = // filteredResults // .map( // (item, index) => ` // Result ${index + 1} // File: // ${item.payload?.file || ""} // Content: // ${item.payload?.text || ""} // ` // ) // .join("\n\n"); // // ====================== // // GPT CALL // // ====================== // const completion = // await llm.chat.completions.create( // { // model: // deploymentName, // temperature: 0, // messages: [ // { // role: "system", // content: ` // You are CPM AI Assistant. // Rules: // - Answer ONLY from the provided context. // - If information is not found, say: // "❌ I could not find this information in the uploaded documents." // Response Style: // - Use emojis where appropriate. // - Use markdown formatting. // - Use headings. // - Use bullet points. // - Make answers professional and easy to read. // - Highlight important information using **bold** text. // - Never mention the context or document chunks. // Example Format: // # 📋 Dress Code Policy // ## 🎯 Overview // Brief summary here. // ## ✅ Key Points // • Point 1 // • Point 2 // • Point 3 // ## ⚠️ Important Notes // • Note 1 // • Note 2 // ## 📝 Conclusion // Short conclusion. // `, // }, // { // role: "user", // content: ` // Context: // ${context} // Question: // ${question} // `, // }, // ] // } // ); // const answer = // completion.choices[0] // .message.content; // return res.json({ // success: true, // question, // answer, // sources: // filteredResults.map( // (item) => ({ // score: // item.score, // file: // item.payload // ?.file, // chunk: // item.payload // ?.chunk, // }) // ), // }); // } catch (error) { // console.error( // "ERROR:", // error // ); // return res // .status(500) // .json({ // success: false, // error: // error.message, // }); // } // } // ); // // ====================== // // START SERVER // // ====================== // async function startServer() { // try { // await loadModel(); // app.listen( // process.env.PORT || // 5000, // () => { // console.log( // "Server running on port", // process.env.PORT || // 5000 // ); // } // ); // } catch (error) { // console.error( // "Startup Error:", // error // ); // } // } // startServer(); // require("dotenv").config(); // const express = require("express"); // const cors = require("cors"); // const OpenAI = require("openai"); // const { pipeline } = require("@xenova/transformers"); // const { QdrantClient } = require("@qdrant/js-client-rest"); // // ─── Config ─────────────────────────────────────────────────────────────────── // const CONFIG = { // azure: { // endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com", // deployment: process.env.AZURE_DEPLOYMENT || "gpt-4o-mini", // apiVersion: process.env.AZURE_API_VERSION || "2024-08-01-preview", // apiKey: process.env.AZURE_OPENAI_KEY, // }, // qdrant: { // url: process.env.QDRANT_URL || "http://20.40.61.65:6333", // collection: process.env.QDRANT_COLLECTION || "pdf_rag", // }, // search: { // topK: 20, // minScore: 0.10, // maxContextDocs: 10, // }, // port: process.env.PORT || 5000, // }; // // ─── Clients ────────────────────────────────────────────────────────────────── // const llm = new OpenAI({ // baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`, // apiKey: CONFIG.azure.apiKey, // defaultHeaders: { "api-key": CONFIG.azure.apiKey }, // defaultQuery: { "api-version": CONFIG.azure.apiVersion }, // }); // const qdrant = new QdrantClient({ // url: CONFIG.qdrant.url, // checkCompatibility: false, // timeout: 30000, // }); // // ─── Embedding model (singleton, lazy-init) ─────────────────────────────────── // let _embedder = null; // async function getEmbedder() { // if (!_embedder) { // console.log("⏳ Loading MiniLM model..."); // _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2"); // console.log("✅ Embedding model ready"); // } // return _embedder; // } // async function createEmbedding(text) { // const model = await getEmbedder(); // const out = await model(text, { pooling: "mean", normalize: true }); // return Array.from(out.data); // } // // ─── Qdrant search ──────────────────────────────────────────────────────────── // async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) { // const results = await qdrant.search(CONFIG.qdrant.collection, { // vector: embedding, // limit: topK, // with_payload: true, // score_threshold: minScore, // let Qdrant filter — faster than client-side // }); // // Re-rank by score, cap to maxContextDocs // return results // .sort((a, b) => b.score - a.score) // .slice(0, maxContextDocs); // } // // ─── Build LLM context string ───────────────────────────────────────────────── // function buildContext(results) { // return results // .map((item, i) => // `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}` // ) // .join("\n\n---\n\n"); // } // // ─── LLM call ───────────────────────────────────────────────────────────────── // const SYSTEM_PROMPT = ` // You are CPM AI Assistant. // Rules: // - Answer only from the provided information. // - If the answer is not available, reply exactly: // "❌ I could not find this information in the uploaded documents." // - Do not make up information. // - Do not mention documents, context, or chunks. // Response Style: // - Use simple English. // - Keep answers short and clear. // - Use headings and bullet points. // - Highlight important words in **bold**. // - Use emojis in headings. // Format: // # 📋 Topic // ## 🎯 Summary // Short answer in 1-2 sentences. // ## ✅ Details // - Point 1 // - Point 2 // - Point 3 // ## ⚠️ Notes // - Extra information (if available). // `.trim(); // async function askLLM(question, context) { // const completion = await llm.chat.completions.create({ // model: CONFIG.azure.deployment, // temperature: 0, // max_tokens: 1500, // messages: [ // { role: "system", content: SYSTEM_PROMPT }, // { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` }, // ], // }); // return completion.choices[0].message.content; // } // // ─── Express app ────────────────────────────────────────────────────────────── // const app = express(); // app.use(cors()); // app.use(express.json({ limit: "1mb" })); // // Request logger middleware // app.use((req, _res, next) => { // console.log(`→ ${req.method} ${req.path}`); // next(); // }); // // ─── Routes ─────────────────────────────────────────────────────────────────── // app.get("/health", (_req, res) => { // res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection }); // }); // app.post("/ask", async (req, res) => { // const { question } = req.body ?? {}; // if (!question?.trim()) { // return res.status(400).json({ success: false, error: "question is required" }); // } // const t0 = Date.now(); // try { // // 1. Embed question // const embedding = await createEmbedding(question.trim()); // // 2. Semantic search // const results = await searchQdrant(embedding); // if (!results.length) { // return res.json({ // success: true, // question, // answer: "❌ I could not find this information in the uploaded documents.", // sources: [], // ms: Date.now() - t0, // }); // } // // 3. Build context + call LLM // const context = buildContext(results); // const answer = await askLLM(question, context); // return res.json({ // success: true, // question, // answer, // sources: results.map(r => ({ // score: +r.score.toFixed(4), // file: r.payload?.file, // page: r.payload?.page, // chunk: r.payload?.chunk, // })), // ms: Date.now() - t0, // }); // } catch (err) { // console.error("❌ /ask error:", err); // return res.status(500).json({ success: false, error: err.message }); // } // }); // app.post("/ask/stream", async (req, res) => { // const { question } = req.body ?? {}; // if (!question?.trim()) { // return res.status(400).json({ success: false, error: "question is required" }); // } // // ── SSE headers ──────────────────────────────────────────────────────────── // res.setHeader("Content-Type", "text/event-stream"); // res.setHeader("Cache-Control", "no-cache"); // res.setHeader("Connection", "keep-alive"); // res.flushHeaders(); // send headers immediately // const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`); // try { // // 1. Embed // send("status", { message: "🔍 Searching documents..." }); // const embedding = await createEmbedding(question.trim()); // // 2. Search Qdrant // const results = await searchQdrant(embedding); // if (!results.length) { // send("token", { token: "❌ I could not find this information in the uploaded documents." }); // send("done", { sources: [] }); // return res.end(); // } // // 3. Send sources early so UI can show them while streaming answer // const sources = results.map(r => ({ // score: +r.score.toFixed(4), // file: r.payload?.file, // page: r.payload?.page, // chunk: r.payload?.chunk, // })); // send("sources", { sources }); // // 4. Stream LLM tokens // send("status", { message: "💬 Generating answer..." }); // const context = buildContext(results); // const stream = await llm.chat.completions.create({ // model: CONFIG.azure.deployment, // temperature: 0, // max_tokens: 1500, // stream: true, // ← key change // messages: [ // { role: "system", content: SYSTEM_PROMPT }, // { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` }, // ], // }); // for await (const chunk of stream) { // const token = chunk.choices[0]?.delta?.content ?? ""; // if (token) send("token", { token }); // } // send("done", { sources }); // } catch (err) { // console.error("❌ /ask/stream error:", err); // send("error", { error: err.message }); // } // res.end(); // }); // app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" })); // // ─── Start ──────────────────────────────────────────────────────────────────── // async function start() { // await getEmbedder(); // app.listen(CONFIG.port, () => { // console.log(`Server running on port ${CONFIG.port}`); // }); // } // start().catch(err => { // console.error("Fatal startup error:", err); // process.exit(1); // }); require("dotenv").config(); const express = require("express"); const cors = require("cors"); const OpenAI = require("openai"); const { pipeline } = require("@xenova/transformers"); const { QdrantClient } = require("@qdrant/js-client-rest"); // ─── Config ─────────────────────────────────────────────────────────────────── const CONFIG = { azure: { endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com", deployment: process.env.AZURE_DEPLOYMENT || "gpt-4o-mini", apiVersion: process.env.AZURE_API_VERSION || "2024-08-01-preview", apiKey: process.env.AZURE_OPENAI_KEY, }, qdrant: { url: process.env.QDRANT_URL || "http://20.40.61.65:6333", collection: process.env.QDRANT_COLLECTION || "pdf_rag", }, search: { topK: 20, minScore: 0.10, maxContextDocs: 10, }, port: process.env.PORT || 5000, }; // ─── Clients ────────────────────────────────────────────────────────────────── const llm = new OpenAI({ baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`, apiKey: CONFIG.azure.apiKey, defaultHeaders: { "api-key": CONFIG.azure.apiKey }, defaultQuery: { "api-version": CONFIG.azure.apiVersion }, }); const qdrant = new QdrantClient({ url: CONFIG.qdrant.url, checkCompatibility: false, timeout: 30000, }); // ─── Embedding model (singleton, lazy-init) ─────────────────────────────────── let _embedder = null; async function getEmbedder() { if (!_embedder) { console.log("⏳ Loading MiniLM model..."); _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2"); console.log("✅ Embedding model ready"); } return _embedder; } async function createEmbedding(text) { const model = await getEmbedder(); const out = await model(text, { pooling: "mean", normalize: true }); return Array.from(out.data); } // ─── Qdrant search ──────────────────────────────────────────────────────────── async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) { const results = await qdrant.search(CONFIG.qdrant.collection, { vector: embedding, limit: topK, with_payload: true, score_threshold: minScore, }); return results .sort((a, b) => b.score - a.score) .slice(0, maxContextDocs); } // ─── Build LLM context string ───────────────────────────────────────────────── function buildContext(results) { return results .map((item, i) => `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}` ) .join("\n\n---\n\n"); } // ─── LLM call ───────────────────────────────────────────────────────────────── const SYSTEM_PROMPT = ` You are CPM AI Assistant. Rules: - Answer only from the provided information. - If the answer is not available, reply exactly: "❌ I could not find this information in the uploaded documents." - Do not make up information. - Do not mention documents, context, or chunks. Response Style: - Use simple English. - Keep answers short and clear. - Use headings and bullet points. - Highlight important words in **bold**. - Use emojis in headings. Format: # 📋 Topic ## 🎯 Summary Short answer in 1-2 sentences. ## ✅ Details - Point 1 - Point 2 - Point 3 ## ⚠️ Notes - Extra information (if available). `.trim(); async function askLLM(question, context) { const completion = await llm.chat.completions.create({ model: CONFIG.azure.deployment, temperature: 0, max_tokens: 1500, messages: [ { role: "system", content: SYSTEM_PROMPT }, { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` }, ], }); return completion.choices[0].message.content; } // ─── Express app ────────────────────────────────────────────────────────────── const app = express(); app.use(cors()); app.use(express.json({ limit: "1mb" })); app.use(express.urlencoded({ extended: true })); app.use((req, _res, next) => { console.log(`→ ${req.method} ${req.path}`); next(); }); // ─── Routes ─────────────────────────────────────────────────────────────────── app.get("/health", (_req, res) => { res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection }); }); app.post("/ask", async (req, res) => { const { question } = req.body ?? {}; if (!question?.trim()) { return res.status(400).json({ success: false, error: "question is required" }); } const t0 = Date.now(); try { const embedding = await createEmbedding(question.trim()); const results = await searchQdrant(embedding); if (!results.length) { return res.json({ success: true, question, answer: "❌ I could not find this information in the uploaded documents.", sources: [], ms: Date.now() - t0, }); } const context = buildContext(results); const answer = await askLLM(question, context); return res.json({ success: true, question, answer, sources: results.map(r => ({ score: +r.score.toFixed(4), file: r.payload?.file, page: r.payload?.page, chunk: r.payload?.chunk, })), ms: Date.now() - t0, }); } catch (err) { console.error("❌ /ask error:", err); return res.status(500).json({ success: false, error: err.message }); } }); // ─── /ask/stream — word-by-word SSE ────────────────────────────────────────── // The LLM streams tokens (which may be partial words or multi-word chunks). // We split every incoming token on whitespace and emit each word as a separate // SSE "token" event so the frontend can animate them one-by-one. app.post("/ask/stream", async (req, res) => { const { question } = req.body ?? {}; if (!question?.trim()) { return res.status(400).json({ success: false, error: "question is required" }); } res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.flushHeaders(); const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`); try { send("status", { message: "🔍 Searching documents..." }); const embedding = await createEmbedding(question.trim()); const results = await searchQdrant(embedding); if (!results.length) { send("token", { token: "❌", isWord: true }); send("token", { token: "I", isWord: true }); send("token", { token: "could", isWord: true }); send("token", { token: "not", isWord: true }); send("token", { token: "find", isWord: true }); send("token", { token: "this", isWord: true }); send("token", { token: "information", isWord: true }); send("token", { token: "in", isWord: true }); send("token", { token: "the", isWord: true }); send("token", { token: "uploaded", isWord: true }); send("token", { token: "documents.", isWord: true }); send("done", { sources: [] }); return res.end(); } const sources = results.map(r => ({ score: +r.score.toFixed(4), file: r.payload?.file, page: r.payload?.page, chunk: r.payload?.chunk, })); send("sources", { sources }); send("status", { message: "💬 Generating answer..." }); const context = buildContext(results); const stream = await llm.chat.completions.create({ model: CONFIG.azure.deployment, temperature: 0, max_tokens: 1500, stream: true, messages: [ { role: "system", content: SYSTEM_PROMPT }, { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` }, ], }); // Buffer to handle tokens that may be split mid-word let wordBuffer = ""; for await (const chunk of stream) { const rawToken = chunk.choices[0]?.delta?.content ?? ""; if (!rawToken) continue; wordBuffer += rawToken; // Split on whitespace — emit complete words, keep trailing partial // We preserve newlines/markdown as separate tokens so markdown renders correctly const parts = wordBuffer.split(/(\s+)/); // Last element might be an incomplete word — buffer it wordBuffer = parts.pop() ?? ""; for (const part of parts) { if (part) { send("token", { token: part, isWord: /\S/.test(part) }); } } } // Flush any remaining buffered text if (wordBuffer) { send("token", { token: wordBuffer, isWord: true }); } send("done", { sources }); } catch (err) { console.error("❌ /ask/stream error:", err); send("error", { error: err.message }); } res.end(); }); app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" })); // ─── Start ──────────────────────────────────────────────────────────────────── async function start() { await getEmbedder(); app.listen(CONFIG.port, () => { console.log(`Server running on port ${CONFIG.port}`); }); } start().catch(err => { console.error("Fatal startup error:", err); process.exit(1); });