|
|
|
@@ -1,654 +1,3 @@
|
|
|
|
|
// //
|
|
|
|
|
// require("dotenv").config();
|
|
|
|
|
|
|
|
|
|
// const express = require("express");
|
|
|
|
|
// const cors = require("cors");
|
|
|
|
|
// const OpenAI = require("openai");
|
|
|
|
|
|
|
|
|
|
// const {
|
|
|
|
|
// pipeline,
|
|
|
|
|
// } = require("@xenova/transformers");
|
|
|
|
|
|
|
|
|
|
// const {
|
|
|
|
|
// QdrantClient,
|
|
|
|
|
// } = require("@qdrant/js-client-rest");
|
|
|
|
|
|
|
|
|
|
// const app = express();
|
|
|
|
|
|
|
|
|
|
// app.use(cors());
|
|
|
|
|
// app.use(express.json());
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // AZURE OPENAI
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// const azureEndpoint =
|
|
|
|
|
// "https://cpmindiayoda-resource.services.ai.azure.com";
|
|
|
|
|
|
|
|
|
|
// const deploymentName = "gpt-4o-mini";
|
|
|
|
|
|
|
|
|
|
// const apiVersion =
|
|
|
|
|
// "2024-08-01-preview";
|
|
|
|
|
|
|
|
|
|
// const llm = new OpenAI({
|
|
|
|
|
// baseURL:
|
|
|
|
|
// `${azureEndpoint}/openai/deployments/${deploymentName}`,
|
|
|
|
|
|
|
|
|
|
// apiKey:
|
|
|
|
|
// process.env.AZURE_OPENAI_KEY,
|
|
|
|
|
|
|
|
|
|
// defaultHeaders: {
|
|
|
|
|
// "api-key":
|
|
|
|
|
// process.env.AZURE_OPENAI_KEY,
|
|
|
|
|
// },
|
|
|
|
|
|
|
|
|
|
// defaultQuery: {
|
|
|
|
|
// "api-version":
|
|
|
|
|
// apiVersion,
|
|
|
|
|
// },
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // QDRANT
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// const qdrant = new QdrantClient({
|
|
|
|
|
// url: "http://20.40.61.65:6333",
|
|
|
|
|
// checkCompatibility: false,
|
|
|
|
|
// timeout: 30000,
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// const COLLECTION_NAME =
|
|
|
|
|
// "pdf_rag";
|
|
|
|
|
|
|
|
|
|
// let embedder;
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // LOAD EMBEDDING MODEL
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// async function loadModel() {
|
|
|
|
|
// console.log(
|
|
|
|
|
// "Loading MiniLM model..."
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// embedder = await pipeline(
|
|
|
|
|
// "feature-extraction",
|
|
|
|
|
// "Xenova/all-MiniLM-L6-v2"
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// console.log(
|
|
|
|
|
// "Embedding model loaded"
|
|
|
|
|
// );
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // EMBEDDING
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// async function createEmbedding(
|
|
|
|
|
// text
|
|
|
|
|
// ) {
|
|
|
|
|
// const output =
|
|
|
|
|
// await embedder(text, {
|
|
|
|
|
// pooling: "mean",
|
|
|
|
|
// normalize: true,
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// return Array.from(output.data);
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // HEALTH
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// app.get("/", (req, res) => {
|
|
|
|
|
// res.json({
|
|
|
|
|
// success: true,
|
|
|
|
|
// message:
|
|
|
|
|
// "Qdrant + Azure GPT RAG Running",
|
|
|
|
|
// });
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // ASK API
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// app.post(
|
|
|
|
|
// "/ask",
|
|
|
|
|
// async (req, res) => {
|
|
|
|
|
// try {
|
|
|
|
|
// const { question } =
|
|
|
|
|
// req.body;
|
|
|
|
|
|
|
|
|
|
// if (!question) {
|
|
|
|
|
// return res
|
|
|
|
|
// .status(400)
|
|
|
|
|
// .json({
|
|
|
|
|
// success: false,
|
|
|
|
|
// error:
|
|
|
|
|
// "Question is required",
|
|
|
|
|
// });
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// console.log(
|
|
|
|
|
// "Question:",
|
|
|
|
|
// question
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // CREATE EMBEDDING
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// const embedding =
|
|
|
|
|
// await createEmbedding(
|
|
|
|
|
// question
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // SEARCH QDRANT
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// const searchResult =
|
|
|
|
|
// await qdrant.search(
|
|
|
|
|
// COLLECTION_NAME,
|
|
|
|
|
// {
|
|
|
|
|
// vector: embedding,
|
|
|
|
|
// limit: 20,
|
|
|
|
|
// }
|
|
|
|
|
// );
|
|
|
|
|
// const filteredResults = searchResult.filter(
|
|
|
|
|
// item => item.score >= 0.10
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// console.log(
|
|
|
|
|
// "Results:",
|
|
|
|
|
// filteredResults.length,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// if (
|
|
|
|
|
// !filteredResults.length
|
|
|
|
|
// ) {
|
|
|
|
|
// return res.json({
|
|
|
|
|
// success: true,
|
|
|
|
|
// answer:
|
|
|
|
|
// "No relevant information found.",
|
|
|
|
|
// sources: [],
|
|
|
|
|
// });
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // CONTEXT
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// const context =
|
|
|
|
|
// filteredResults
|
|
|
|
|
// .map(
|
|
|
|
|
// (item, index) => `
|
|
|
|
|
// Result ${index + 1}
|
|
|
|
|
|
|
|
|
|
// File:
|
|
|
|
|
// ${item.payload?.file || ""}
|
|
|
|
|
|
|
|
|
|
// Content:
|
|
|
|
|
// ${item.payload?.text || ""}
|
|
|
|
|
// `
|
|
|
|
|
// )
|
|
|
|
|
// .join("\n\n");
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // GPT CALL
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// const completion =
|
|
|
|
|
// await llm.chat.completions.create(
|
|
|
|
|
// {
|
|
|
|
|
// model:
|
|
|
|
|
// deploymentName,
|
|
|
|
|
|
|
|
|
|
// temperature: 0,
|
|
|
|
|
|
|
|
|
|
// messages: [
|
|
|
|
|
// {
|
|
|
|
|
// role: "system",
|
|
|
|
|
// content: `
|
|
|
|
|
// You are CPM AI Assistant.
|
|
|
|
|
|
|
|
|
|
// Rules:
|
|
|
|
|
// - Answer ONLY from the provided context.
|
|
|
|
|
// - If information is not found, say:
|
|
|
|
|
// "❌ I could not find this information in the uploaded documents."
|
|
|
|
|
|
|
|
|
|
// Response Style:
|
|
|
|
|
// - Use emojis where appropriate.
|
|
|
|
|
// - Use markdown formatting.
|
|
|
|
|
// - Use headings.
|
|
|
|
|
// - Use bullet points.
|
|
|
|
|
// - Make answers professional and easy to read.
|
|
|
|
|
// - Highlight important information using **bold** text.
|
|
|
|
|
// - Never mention the context or document chunks.
|
|
|
|
|
|
|
|
|
|
// Example Format:
|
|
|
|
|
|
|
|
|
|
// # 📋 Dress Code Policy
|
|
|
|
|
|
|
|
|
|
// ## 🎯 Overview
|
|
|
|
|
// Brief summary here.
|
|
|
|
|
|
|
|
|
|
// ## ✅ Key Points
|
|
|
|
|
// • Point 1
|
|
|
|
|
// • Point 2
|
|
|
|
|
// • Point 3
|
|
|
|
|
|
|
|
|
|
// ## ⚠️ Important Notes
|
|
|
|
|
// • Note 1
|
|
|
|
|
// • Note 2
|
|
|
|
|
|
|
|
|
|
// ## 📝 Conclusion
|
|
|
|
|
// Short conclusion.
|
|
|
|
|
// `,
|
|
|
|
|
// },
|
|
|
|
|
// {
|
|
|
|
|
// role: "user",
|
|
|
|
|
// content: `
|
|
|
|
|
// Context:
|
|
|
|
|
// ${context}
|
|
|
|
|
|
|
|
|
|
// Question:
|
|
|
|
|
// ${question}
|
|
|
|
|
// `,
|
|
|
|
|
// },
|
|
|
|
|
// ]
|
|
|
|
|
// }
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// const answer =
|
|
|
|
|
// completion.choices[0]
|
|
|
|
|
// .message.content;
|
|
|
|
|
|
|
|
|
|
// return res.json({
|
|
|
|
|
// success: true,
|
|
|
|
|
|
|
|
|
|
// question,
|
|
|
|
|
|
|
|
|
|
// answer,
|
|
|
|
|
|
|
|
|
|
// sources:
|
|
|
|
|
// filteredResults.map(
|
|
|
|
|
// (item) => ({
|
|
|
|
|
// score:
|
|
|
|
|
// item.score,
|
|
|
|
|
|
|
|
|
|
// file:
|
|
|
|
|
// item.payload
|
|
|
|
|
// ?.file,
|
|
|
|
|
|
|
|
|
|
// chunk:
|
|
|
|
|
// item.payload
|
|
|
|
|
// ?.chunk,
|
|
|
|
|
// })
|
|
|
|
|
// ),
|
|
|
|
|
// });
|
|
|
|
|
// } catch (error) {
|
|
|
|
|
// console.error(
|
|
|
|
|
// "ERROR:",
|
|
|
|
|
// error
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// return res
|
|
|
|
|
// .status(500)
|
|
|
|
|
// .json({
|
|
|
|
|
// success: false,
|
|
|
|
|
// error:
|
|
|
|
|
// error.message,
|
|
|
|
|
// });
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
// );
|
|
|
|
|
|
|
|
|
|
// // ======================
|
|
|
|
|
// // START SERVER
|
|
|
|
|
// // ======================
|
|
|
|
|
|
|
|
|
|
// async function startServer() {
|
|
|
|
|
// try {
|
|
|
|
|
// await loadModel();
|
|
|
|
|
|
|
|
|
|
// app.listen(
|
|
|
|
|
// process.env.PORT ||
|
|
|
|
|
// 5000,
|
|
|
|
|
// () => {
|
|
|
|
|
// console.log(
|
|
|
|
|
// "Server running on port",
|
|
|
|
|
// process.env.PORT ||
|
|
|
|
|
// 5000
|
|
|
|
|
// );
|
|
|
|
|
// }
|
|
|
|
|
// );
|
|
|
|
|
// } catch (error) {
|
|
|
|
|
// console.error(
|
|
|
|
|
// "Startup Error:",
|
|
|
|
|
// error
|
|
|
|
|
// );
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// startServer();
|
|
|
|
|
|
|
|
|
|
// require("dotenv").config();
|
|
|
|
|
|
|
|
|
|
// const express = require("express");
|
|
|
|
|
// const cors = require("cors");
|
|
|
|
|
// const OpenAI = require("openai");
|
|
|
|
|
// const { pipeline } = require("@xenova/transformers");
|
|
|
|
|
// const { QdrantClient } = require("@qdrant/js-client-rest");
|
|
|
|
|
|
|
|
|
|
// // ─── Config ───────────────────────────────────────────────────────────────────
|
|
|
|
|
// const CONFIG = {
|
|
|
|
|
// azure: {
|
|
|
|
|
// endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com",
|
|
|
|
|
// deployment: process.env.AZURE_DEPLOYMENT || "gpt-4o-mini",
|
|
|
|
|
// apiVersion: process.env.AZURE_API_VERSION || "2024-08-01-preview",
|
|
|
|
|
// apiKey: process.env.AZURE_OPENAI_KEY,
|
|
|
|
|
// },
|
|
|
|
|
// qdrant: {
|
|
|
|
|
// url: process.env.QDRANT_URL || "http://20.40.61.65:6333",
|
|
|
|
|
// collection: process.env.QDRANT_COLLECTION || "pdf_rag",
|
|
|
|
|
// },
|
|
|
|
|
// search: {
|
|
|
|
|
// topK: 20,
|
|
|
|
|
// minScore: 0.10,
|
|
|
|
|
// maxContextDocs: 10,
|
|
|
|
|
// },
|
|
|
|
|
// port: process.env.PORT || 5000,
|
|
|
|
|
// };
|
|
|
|
|
|
|
|
|
|
// // ─── Clients ──────────────────────────────────────────────────────────────────
|
|
|
|
|
// const llm = new OpenAI({
|
|
|
|
|
// baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
|
|
|
|
|
// apiKey: CONFIG.azure.apiKey,
|
|
|
|
|
// defaultHeaders: { "api-key": CONFIG.azure.apiKey },
|
|
|
|
|
// defaultQuery: { "api-version": CONFIG.azure.apiVersion },
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// const qdrant = new QdrantClient({
|
|
|
|
|
// url: CONFIG.qdrant.url,
|
|
|
|
|
// checkCompatibility: false,
|
|
|
|
|
// timeout: 30000,
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// // ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
|
|
|
|
|
// let _embedder = null;
|
|
|
|
|
// async function getEmbedder() {
|
|
|
|
|
// if (!_embedder) {
|
|
|
|
|
// console.log("⏳ Loading MiniLM model...");
|
|
|
|
|
// _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
|
|
|
|
|
// console.log("✅ Embedding model ready");
|
|
|
|
|
// }
|
|
|
|
|
// return _embedder;
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// async function createEmbedding(text) {
|
|
|
|
|
// const model = await getEmbedder();
|
|
|
|
|
// const out = await model(text, { pooling: "mean", normalize: true });
|
|
|
|
|
// return Array.from(out.data);
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // ─── Qdrant search ────────────────────────────────────────────────────────────
|
|
|
|
|
// async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
|
|
|
|
|
// const results = await qdrant.search(CONFIG.qdrant.collection, {
|
|
|
|
|
// vector: embedding,
|
|
|
|
|
// limit: topK,
|
|
|
|
|
// with_payload: true,
|
|
|
|
|
// score_threshold: minScore, // let Qdrant filter — faster than client-side
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// // Re-rank by score, cap to maxContextDocs
|
|
|
|
|
// return results
|
|
|
|
|
// .sort((a, b) => b.score - a.score)
|
|
|
|
|
// .slice(0, maxContextDocs);
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // ─── Build LLM context string ─────────────────────────────────────────────────
|
|
|
|
|
// function buildContext(results) {
|
|
|
|
|
// return results
|
|
|
|
|
// .map((item, i) =>
|
|
|
|
|
// `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
|
|
|
|
|
// )
|
|
|
|
|
// .join("\n\n---\n\n");
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // ─── LLM call ─────────────────────────────────────────────────────────────────
|
|
|
|
|
// const SYSTEM_PROMPT = `
|
|
|
|
|
// You are CPM AI Assistant.
|
|
|
|
|
|
|
|
|
|
// Rules:
|
|
|
|
|
// - Answer only from the provided information.
|
|
|
|
|
// - If the answer is not available, reply exactly:
|
|
|
|
|
// "❌ I could not find this information in the uploaded documents."
|
|
|
|
|
// - Do not make up information.
|
|
|
|
|
// - Do not mention documents, context, or chunks.
|
|
|
|
|
|
|
|
|
|
// Response Style:
|
|
|
|
|
// - Use simple English.
|
|
|
|
|
// - Keep answers short and clear.
|
|
|
|
|
// - Use headings and bullet points.
|
|
|
|
|
// - Highlight important words in **bold**.
|
|
|
|
|
// - Use emojis in headings.
|
|
|
|
|
|
|
|
|
|
// Format:
|
|
|
|
|
|
|
|
|
|
// # 📋 Topic
|
|
|
|
|
|
|
|
|
|
// ## 🎯 Summary
|
|
|
|
|
// Short answer in 1-2 sentences.
|
|
|
|
|
|
|
|
|
|
// ## ✅ Details
|
|
|
|
|
// - Point 1
|
|
|
|
|
// - Point 2
|
|
|
|
|
// - Point 3
|
|
|
|
|
|
|
|
|
|
// ## ⚠️ Notes
|
|
|
|
|
// - Extra information (if available).
|
|
|
|
|
// `.trim();
|
|
|
|
|
|
|
|
|
|
// async function askLLM(question, context) {
|
|
|
|
|
// const completion = await llm.chat.completions.create({
|
|
|
|
|
// model: CONFIG.azure.deployment,
|
|
|
|
|
// temperature: 0,
|
|
|
|
|
// max_tokens: 1500,
|
|
|
|
|
// messages: [
|
|
|
|
|
// { role: "system", content: SYSTEM_PROMPT },
|
|
|
|
|
// { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
|
|
|
|
|
// ],
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// return completion.choices[0].message.content;
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // ─── Express app ──────────────────────────────────────────────────────────────
|
|
|
|
|
// const app = express();
|
|
|
|
|
// app.use(cors());
|
|
|
|
|
// app.use(express.json({ limit: "1mb" }));
|
|
|
|
|
|
|
|
|
|
// // Request logger middleware
|
|
|
|
|
// app.use((req, _res, next) => {
|
|
|
|
|
// console.log(`→ ${req.method} ${req.path}`);
|
|
|
|
|
// next();
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// // ─── Routes ───────────────────────────────────────────────────────────────────
|
|
|
|
|
// app.get("/health", (_req, res) => {
|
|
|
|
|
// res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// app.post("/ask", async (req, res) => {
|
|
|
|
|
// const { question } = req.body ?? {};
|
|
|
|
|
|
|
|
|
|
// if (!question?.trim()) {
|
|
|
|
|
// return res.status(400).json({ success: false, error: "question is required" });
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// const t0 = Date.now();
|
|
|
|
|
|
|
|
|
|
// try {
|
|
|
|
|
// // 1. Embed question
|
|
|
|
|
// const embedding = await createEmbedding(question.trim());
|
|
|
|
|
|
|
|
|
|
// // 2. Semantic search
|
|
|
|
|
// const results = await searchQdrant(embedding);
|
|
|
|
|
|
|
|
|
|
// if (!results.length) {
|
|
|
|
|
// return res.json({
|
|
|
|
|
// success: true,
|
|
|
|
|
// question,
|
|
|
|
|
// answer: "❌ I could not find this information in the uploaded documents.",
|
|
|
|
|
// sources: [],
|
|
|
|
|
// ms: Date.now() - t0,
|
|
|
|
|
// });
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // 3. Build context + call LLM
|
|
|
|
|
// const context = buildContext(results);
|
|
|
|
|
// const answer = await askLLM(question, context);
|
|
|
|
|
|
|
|
|
|
// return res.json({
|
|
|
|
|
// success: true,
|
|
|
|
|
// question,
|
|
|
|
|
// answer,
|
|
|
|
|
// sources: results.map(r => ({
|
|
|
|
|
// score: +r.score.toFixed(4),
|
|
|
|
|
// file: r.payload?.file,
|
|
|
|
|
// page: r.payload?.page,
|
|
|
|
|
// chunk: r.payload?.chunk,
|
|
|
|
|
// })),
|
|
|
|
|
// ms: Date.now() - t0,
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// } catch (err) {
|
|
|
|
|
// console.error("❌ /ask error:", err);
|
|
|
|
|
// return res.status(500).json({ success: false, error: err.message });
|
|
|
|
|
// }
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// app.post("/ask/stream", async (req, res) => {
|
|
|
|
|
// const { question } = req.body ?? {};
|
|
|
|
|
|
|
|
|
|
// if (!question?.trim()) {
|
|
|
|
|
// return res.status(400).json({ success: false, error: "question is required" });
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // ── SSE headers ────────────────────────────────────────────────────────────
|
|
|
|
|
// res.setHeader("Content-Type", "text/event-stream");
|
|
|
|
|
// res.setHeader("Cache-Control", "no-cache");
|
|
|
|
|
// res.setHeader("Connection", "keep-alive");
|
|
|
|
|
// res.flushHeaders(); // send headers immediately
|
|
|
|
|
|
|
|
|
|
// const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
|
|
|
|
|
|
|
|
|
// try {
|
|
|
|
|
// // 1. Embed
|
|
|
|
|
// send("status", { message: "🔍 Searching documents..." });
|
|
|
|
|
// const embedding = await createEmbedding(question.trim());
|
|
|
|
|
|
|
|
|
|
// // 2. Search Qdrant
|
|
|
|
|
// const results = await searchQdrant(embedding);
|
|
|
|
|
|
|
|
|
|
// if (!results.length) {
|
|
|
|
|
// send("token", { token: "❌ I could not find this information in the uploaded documents." });
|
|
|
|
|
// send("done", { sources: [] });
|
|
|
|
|
// return res.end();
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// // 3. Send sources early so UI can show them while streaming answer
|
|
|
|
|
// const sources = results.map(r => ({
|
|
|
|
|
// score: +r.score.toFixed(4),
|
|
|
|
|
// file: r.payload?.file,
|
|
|
|
|
// page: r.payload?.page,
|
|
|
|
|
// chunk: r.payload?.chunk,
|
|
|
|
|
// }));
|
|
|
|
|
// send("sources", { sources });
|
|
|
|
|
|
|
|
|
|
// // 4. Stream LLM tokens
|
|
|
|
|
// send("status", { message: "💬 Generating answer..." });
|
|
|
|
|
|
|
|
|
|
// const context = buildContext(results);
|
|
|
|
|
// const stream = await llm.chat.completions.create({
|
|
|
|
|
// model: CONFIG.azure.deployment,
|
|
|
|
|
// temperature: 0,
|
|
|
|
|
// max_tokens: 1500,
|
|
|
|
|
// stream: true, // ← key change
|
|
|
|
|
// messages: [
|
|
|
|
|
// { role: "system", content: SYSTEM_PROMPT },
|
|
|
|
|
// { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
|
|
|
|
|
// ],
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
// for await (const chunk of stream) {
|
|
|
|
|
// const token = chunk.choices[0]?.delta?.content ?? "";
|
|
|
|
|
// if (token) send("token", { token });
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// send("done", { sources });
|
|
|
|
|
|
|
|
|
|
// } catch (err) {
|
|
|
|
|
// console.error("❌ /ask/stream error:", err);
|
|
|
|
|
// send("error", { error: err.message });
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// res.end();
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" }));
|
|
|
|
|
|
|
|
|
|
// // ─── Start ────────────────────────────────────────────────────────────────────
|
|
|
|
|
// async function start() {
|
|
|
|
|
// await getEmbedder();
|
|
|
|
|
|
|
|
|
|
// app.listen(CONFIG.port, () => {
|
|
|
|
|
// console.log(`Server running on port ${CONFIG.port}`);
|
|
|
|
|
// });
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// start().catch(err => {
|
|
|
|
|
// console.error("Fatal startup error:", err);
|
|
|
|
|
// process.exit(1);
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
require("dotenv").config();
|
|
|
|
|
|
|
|
|
|
const express = require("express");
|
|
|
|
@@ -657,8 +6,6 @@ const OpenAI = require("openai");
|
|
|
|
|
const { pipeline } = require("@xenova/transformers");
|
|
|
|
|
const { QdrantClient } = require("@qdrant/js-client-rest");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ─── Config ───────────────────────────────────────────────────────────────────
|
|
|
|
|
const CONFIG = {
|
|
|
|
|
azure: {
|
|
|
|
|
endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com",
|
|
|
|
@@ -696,9 +43,9 @@ const qdrant = new QdrantClient({
|
|
|
|
|
let _embedder = null;
|
|
|
|
|
async function getEmbedder() {
|
|
|
|
|
if (!_embedder) {
|
|
|
|
|
console.log("⏳ Loading MiniLM model...");
|
|
|
|
|
console.log("Loading MiniLM model...");
|
|
|
|
|
_embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
|
|
|
|
|
console.log("✅ Embedding model ready");
|
|
|
|
|
console.log("Embedding model ready");
|
|
|
|
|
}
|
|
|
|
|
return _embedder;
|
|
|
|
|
}
|
|
|
|
@@ -841,10 +188,6 @@ app.post("/ask", async (req, res) => {
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// ─── /ask/stream — word-by-word SSE ──────────────────────────────────────────
|
|
|
|
|
// The LLM streams tokens (which may be partial words or multi-word chunks).
|
|
|
|
|
// We split every incoming token on whitespace and emit each word as a separate
|
|
|
|
|
// SSE "token" event so the frontend can animate them one-by-one.
|
|
|
|
|
app.post("/ask/stream", async (req, res) => {
|
|
|
|
|
const { question } = req.body ?? {};
|
|
|
|
|
|
|
|
|
|