gyanBuddy/server.js

// //
// require("dotenv").config();

// const express = require("express");
// const cors = require("cors");
// const OpenAI = require("openai");

// const {
//   pipeline,
// } = require("@xenova/transformers");

// const {
//   QdrantClient,
// } = require("@qdrant/js-client-rest");

// const app = express();

// app.use(cors());
// app.use(express.json());

// // ======================
// // AZURE OPENAI
// // ======================

// const azureEndpoint =
//   "https://cpmindiayoda-resource.services.ai.azure.com";

// const deploymentName = "gpt-4o-mini";

// const apiVersion =
//   "2024-08-01-preview";

// const llm = new OpenAI({
//   baseURL:
//     `${azureEndpoint}/openai/deployments/${deploymentName}`,

//   apiKey:
//     process.env.AZURE_OPENAI_KEY,

//   defaultHeaders: {
//     "api-key":
//       process.env.AZURE_OPENAI_KEY,
//   },

//   defaultQuery: {
//     "api-version":
//       apiVersion,
//   },
// });

// // ======================
// // QDRANT
// // ======================

// const qdrant = new QdrantClient({
//   url: "http://20.40.61.65:6333",
//   checkCompatibility: false,
//   timeout: 30000,
// });

// const COLLECTION_NAME =
//   "pdf_rag";

// let embedder;

// // ======================
// // LOAD EMBEDDING MODEL
// // ======================

// async function loadModel() {
//   console.log(
//     "Loading MiniLM model..."
//   );

//   embedder = await pipeline(
//     "feature-extraction",
//     "Xenova/all-MiniLM-L6-v2"
//   );

//   console.log(
//     "Embedding model loaded"
//   );
// }

// // ======================
// // EMBEDDING
// // ======================

// async function createEmbedding(
//   text
// ) {
//   const output =
//     await embedder(text, {
//       pooling: "mean",
//       normalize: true,
//     });

//   return Array.from(output.data);
// }

// // ======================
// // HEALTH
// // ======================

// app.get("/", (req, res) => {
//   res.json({
//     success: true,
//     message:
//       "Qdrant + Azure GPT RAG Running",
//   });
// });

// // ======================
// // ASK API
// // ======================

// app.post(
//   "/ask",
//   async (req, res) => {
//     try {
//       const { question } =
//         req.body;

//       if (!question) {
//         return res
//           .status(400)
//           .json({
//             success: false,
//             error:
//               "Question is required",
//           });
//       }

//       console.log(
//         "Question:",
//         question
//       );

//       // ======================
//       // CREATE EMBEDDING
//       // ======================

//       const embedding =
//         await createEmbedding(
//           question
//         );

//       // ======================
//       // SEARCH QDRANT
//       // ======================

//       const searchResult =
//         await qdrant.search(
//           COLLECTION_NAME,
//           {
//             vector: embedding,
//             limit: 20,
//           }
//         );
//       const filteredResults = searchResult.filter(
//         item => item.score >= 0.10
//       );

//       console.log(
//         "Results:",
//         filteredResults.length,


//       );

//       if (
//         !filteredResults.length
//       ) {
//         return res.json({
//           success: true,
//           answer:
//             "No relevant information found.",
//           sources: [],
//         });
//       }

//       // ======================
//       // CONTEXT
//       // ======================

//       const context =
//         filteredResults
//           .map(
//             (item, index) => `
// Result ${index + 1}

// File:
// ${item.payload?.file || ""}

// Content:
// ${item.payload?.text || ""}
// `
//           )
//           .join("\n\n");

//       // ======================
//       // GPT CALL
//       // ======================

//       const completion =
//         await llm.chat.completions.create(
//           {
//             model:
//               deploymentName,

//             temperature: 0,

//             messages: [
//               {
//                 role: "system",
//                 content: `
// You are CPM AI Assistant.

// Rules:
// - Answer ONLY from the provided context.
// - If information is not found, say:
//   "❌ I could not find this information in the uploaded documents."

// Response Style:
// - Use emojis where appropriate.
// - Use markdown formatting.
// - Use headings.
// - Use bullet points.
// - Make answers professional and easy to read.
// - Highlight important information using **bold** text.
// - Never mention the context or document chunks.

// Example Format:

// # 📋 Dress Code Policy

// ## 🎯 Overview
// Brief summary here.

// ## ✅ Key Points
// • Point 1
// • Point 2
// • Point 3

// ## ⚠️ Important Notes
// • Note 1
// • Note 2

// ## 📝 Conclusion
// Short conclusion.
// `,
//               },
//               {
//                 role: "user",
//                 content: `
// Context:
// ${context}

// Question:
// ${question}
// `,
//               },
//             ]
//           }
//         );

//       const answer =
//         completion.choices[0]
//           .message.content;

//       return res.json({
//         success: true,

//         question,

//         answer,

//         sources:
//           filteredResults.map(
//             (item) => ({
//               score:
//                 item.score,

//               file:
//                 item.payload
//                   ?.file,

//               chunk:
//                 item.payload
//                   ?.chunk,
//             })
//           ),
//       });
//     } catch (error) {
//       console.error(
//         "ERROR:",
//         error
//       );

//       return res
//         .status(500)
//         .json({
//           success: false,
//           error:
//             error.message,
//         });
//     }
//   }
// );

// // ======================
// // START SERVER
// // ======================

// async function startServer() {
//   try {
//     await loadModel();

//     app.listen(
//       process.env.PORT ||
//       5000,
//       () => {
//         console.log(
//           "Server running on port",
//           process.env.PORT ||
//           5000
//         );
//       }
//     );
//   } catch (error) {
//     console.error(
//       "Startup Error:",
//       error
//     );
//   }
// }

// startServer();

// require("dotenv").config();

// const express = require("express");
// const cors = require("cors");
// const OpenAI = require("openai");
// const { pipeline } = require("@xenova/transformers");
// const { QdrantClient } = require("@qdrant/js-client-rest");

// // ─── Config ───────────────────────────────────────────────────────────────────
// const CONFIG = {
//   azure: {
//     endpoint:   process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com",
//     deployment: process.env.AZURE_DEPLOYMENT       || "gpt-4o-mini",
//     apiVersion: process.env.AZURE_API_VERSION      || "2024-08-01-preview",
//     apiKey:     process.env.AZURE_OPENAI_KEY,
//   },
//   qdrant: {
//     url:        process.env.QDRANT_URL             || "http://20.40.61.65:6333",
//     collection: process.env.QDRANT_COLLECTION      || "pdf_rag",
//   },
//   search: {
//     topK:           20,
//     minScore:       0.10,
//     maxContextDocs: 10,
//   },
//   port: process.env.PORT || 5000,
// };

// // ─── Clients ──────────────────────────────────────────────────────────────────
// const llm = new OpenAI({
//   baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
//   apiKey:  CONFIG.azure.apiKey,
//   defaultHeaders: { "api-key": CONFIG.azure.apiKey },
//   defaultQuery:   { "api-version": CONFIG.azure.apiVersion },
// });

// const qdrant = new QdrantClient({
//   url:                CONFIG.qdrant.url,
//   checkCompatibility: false,
//   timeout:            30000,
// });

// // ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
// let _embedder = null;
// async function getEmbedder() {
//   if (!_embedder) {
//     console.log("⏳ Loading MiniLM model...");
//     _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
//     console.log("✅ Embedding model ready");
//   }
//   return _embedder;
// }

// async function createEmbedding(text) {
//   const model = await getEmbedder();
//   const out = await model(text, { pooling: "mean", normalize: true });
//   return Array.from(out.data);
// }

// // ─── Qdrant search ────────────────────────────────────────────────────────────
// async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
//   const results = await qdrant.search(CONFIG.qdrant.collection, {
//     vector:      embedding,
//     limit:       topK,
//     with_payload: true,
//     score_threshold: minScore, // let Qdrant filter — faster than client-side
//   });

//   // Re-rank by score, cap to maxContextDocs
//   return results
//     .sort((a, b) => b.score - a.score)
//     .slice(0, maxContextDocs);
// }

// // ─── Build LLM context string ─────────────────────────────────────────────────
// function buildContext(results) {
//   return results
//     .map((item, i) =>
//       `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
//     )
//     .join("\n\n---\n\n");
// }

// // ─── LLM call ─────────────────────────────────────────────────────────────────
// const SYSTEM_PROMPT = `
// You are CPM AI Assistant.

// Rules:
// - Answer only from the provided information.
// - If the answer is not available, reply exactly:
//   "❌ I could not find this information in the uploaded documents."
// - Do not make up information.
// - Do not mention documents, context, or chunks.

// Response Style:
// - Use simple English.
// - Keep answers short and clear.
// - Use headings and bullet points.
// - Highlight important words in **bold**.
// - Use emojis in headings.

// Format:

// # 📋 Topic

// ## 🎯 Summary
// Short answer in 1-2 sentences.

// ## ✅ Details
// - Point 1
// - Point 2
// - Point 3

// ## ⚠️ Notes
// - Extra information (if available).
// `.trim();

// async function askLLM(question, context) {
//   const completion = await llm.chat.completions.create({
//     model:       CONFIG.azure.deployment,
//     temperature: 0,
//     max_tokens:  1500,
//     messages: [
//       { role: "system", content: SYSTEM_PROMPT },
//       { role: "user",   content: `Context:\n${context}\n\nQuestion:\n${question}` },
//     ],
//   });

//   return completion.choices[0].message.content;
// }

// // ─── Express app ──────────────────────────────────────────────────────────────
// const app = express();
// app.use(cors());
// app.use(express.json({ limit: "1mb" }));

// // Request logger middleware
// app.use((req, _res, next) => {
//   console.log(`→ ${req.method} ${req.path}`);
//   next();
// });

// // ─── Routes ───────────────────────────────────────────────────────────────────
// app.get("/health", (_req, res) => {
//   res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
// });

// app.post("/ask", async (req, res) => {
//   const { question } = req.body ?? {};

//   if (!question?.trim()) {
//     return res.status(400).json({ success: false, error: "question is required" });
//   }

//   const t0 = Date.now();

//   try {
//     // 1. Embed question
//     const embedding = await createEmbedding(question.trim());

//     // 2. Semantic search
//     const results = await searchQdrant(embedding);

//     if (!results.length) {
//       return res.json({
//         success: true,
//         question,
//         answer:  "❌ I could not find this information in the uploaded documents.",
//         sources: [],
//         ms:      Date.now() - t0,
//       });
//     }

//     // 3. Build context + call LLM
//     const context = buildContext(results);
//     const answer  = await askLLM(question, context);

//     return res.json({
//       success: true,
//       question,
//       answer,
//       sources: results.map(r => ({
//         score: +r.score.toFixed(4),
//         file:  r.payload?.file,
//         page:  r.payload?.page,
//         chunk: r.payload?.chunk,
//       })),
//       ms: Date.now() - t0,
//     });

//   } catch (err) {
//     console.error("❌ /ask error:", err);
//     return res.status(500).json({ success: false, error: err.message });
//   }
// });

// app.post("/ask/stream", async (req, res) => {
//   const { question } = req.body ?? {};

//   if (!question?.trim()) {
//     return res.status(400).json({ success: false, error: "question is required" });
//   }

//   // ── SSE headers ────────────────────────────────────────────────────────────
//   res.setHeader("Content-Type",  "text/event-stream");
//   res.setHeader("Cache-Control", "no-cache");
//   res.setHeader("Connection",    "keep-alive");
//   res.flushHeaders(); // send headers immediately

//   const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);

//   try {
//     // 1. Embed
//     send("status", { message: "🔍 Searching documents..." });
//     const embedding = await createEmbedding(question.trim());

//     // 2. Search Qdrant
//     const results = await searchQdrant(embedding);

//     if (!results.length) {
//       send("token",  { token: "❌ I could not find this information in the uploaded documents." });
//       send("done",   { sources: [] });
//       return res.end();
//     }

//     // 3. Send sources early so UI can show them while streaming answer
//     const sources = results.map(r => ({
//       score: +r.score.toFixed(4),
//       file:  r.payload?.file,
//       page:  r.payload?.page,
//       chunk: r.payload?.chunk,
//     }));
//     send("sources", { sources });

//     // 4. Stream LLM tokens
//     send("status", { message: "💬 Generating answer..." });

//     const context = buildContext(results);
//     const stream  = await llm.chat.completions.create({
//       model:       CONFIG.azure.deployment,
//       temperature: 0,
//       max_tokens:  1500,
//       stream:      true,   // ← key change
//       messages: [
//         { role: "system", content: SYSTEM_PROMPT },
//         { role: "user",   content: `Context:\n${context}\n\nQuestion:\n${question}` },
//       ],
//     });

//     for await (const chunk of stream) {
//       const token = chunk.choices[0]?.delta?.content ?? "";
//       if (token) send("token", { token });
//     }

//     send("done", { sources });

//   } catch (err) {
//     console.error("❌ /ask/stream error:", err);
//     send("error", { error: err.message });
//   }

//   res.end();
// });


// app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" }));

// // ─── Start ────────────────────────────────────────────────────────────────────
// async function start() {
//   await getEmbedder();

//   app.listen(CONFIG.port, () => {
//     console.log(`Server running on port ${CONFIG.port}`);
//   });
// }

// start().catch(err => {
//   console.error("Fatal startup error:", err);
//   process.exit(1);
// });


require("dotenv").config();

const express = require("express");
const cors = require("cors");
const OpenAI = require("openai");
const { pipeline } = require("@xenova/transformers");
const { QdrantClient } = require("@qdrant/js-client-rest");


// ─── Config ───────────────────────────────────────────────────────────────────
const CONFIG = {
  azure: {
    endpoint:   process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com",
    deployment: process.env.AZURE_DEPLOYMENT       || "gpt-4o-mini",
    apiVersion: process.env.AZURE_API_VERSION      || "2024-08-01-preview",
    apiKey:     process.env.AZURE_OPENAI_KEY,
  },
  qdrant: {
    url:        process.env.QDRANT_URL             || "http://20.40.61.65:6333",
    collection: process.env.QDRANT_COLLECTION      || "pdf_rag",
  },
  search: {
    topK:           20,
    minScore:       0.10,
    maxContextDocs: 10,
  },
  port: process.env.PORT || 5000,
};

// ─── Clients ──────────────────────────────────────────────────────────────────
const llm = new OpenAI({
  baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
  apiKey:  CONFIG.azure.apiKey,
  defaultHeaders: { "api-key": CONFIG.azure.apiKey },
  defaultQuery:   { "api-version": CONFIG.azure.apiVersion },
});

const qdrant = new QdrantClient({
  url:                CONFIG.qdrant.url,
  checkCompatibility: false,
  timeout:            30000,
});

// ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
let _embedder = null;
async function getEmbedder() {
  if (!_embedder) {
    console.log("⏳ Loading MiniLM model...");
    _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
    console.log("✅ Embedding model ready");
  }
  return _embedder;
}

async function createEmbedding(text) {
  const model = await getEmbedder();
  const out = await model(text, { pooling: "mean", normalize: true });
  return Array.from(out.data);
}

// ─── Qdrant search ────────────────────────────────────────────────────────────
async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
  const results = await qdrant.search(CONFIG.qdrant.collection, {
    vector:      embedding,
    limit:       topK,
    with_payload: true,
    score_threshold: minScore,
  });

  return results
    .sort((a, b) => b.score - a.score)
    .slice(0, maxContextDocs);
}

// ─── Build LLM context string ─────────────────────────────────────────────────
function buildContext(results) {
  return results
    .map((item, i) =>
      `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
    )
    .join("\n\n---\n\n");
}

// ─── LLM call ─────────────────────────────────────────────────────────────────
const SYSTEM_PROMPT = `
You are CPM AI Assistant.

Rules:
- Answer only from the provided information.
- If the answer is not available, reply exactly:
  "❌ I could not find this information in the uploaded documents."
- Do not make up information.
- Do not mention documents, context, or chunks.

Response Style:
- Use simple English.
- Keep answers short and clear.
- Use headings and bullet points.
- Highlight important words in **bold**.
- Use emojis in headings.

Format:

# 📋 Topic

## 🎯 Summary
Short answer in 1-2 sentences.

## ✅ Details
- Point 1
- Point 2
- Point 3

## ⚠️ Notes
- Extra information (if available).
`.trim();

async function askLLM(question, context) {
  const completion = await llm.chat.completions.create({
    model:       CONFIG.azure.deployment,
    temperature: 0,
    max_tokens:  1500,
    messages: [
      { role: "system", content: SYSTEM_PROMPT },
      { role: "user",   content: `Context:\n${context}\n\nQuestion:\n${question}` },
    ],
  });

  return completion.choices[0].message.content;
}

// ─── Express app ──────────────────────────────────────────────────────────────
const app = express();
app.use(cors());
app.use(express.json({ limit: "1mb" }));
app.use(express.urlencoded({ extended: true }));

app.use((req, _res, next) => {
  console.log(`→ ${req.method} ${req.path}`);
  next();
});

// ─── Routes ───────────────────────────────────────────────────────────────────
app.get("/health", (_req, res) => {
  res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
});

app.post("/ask", async (req, res) => {
  const { question } = req.body ?? {};

  if (!question?.trim()) {
    return res.status(400).json({ success: false, error: "question is required" });
  }

  const t0 = Date.now();

  try {
    const embedding = await createEmbedding(question.trim());
    const results = await searchQdrant(embedding);

    if (!results.length) {
      return res.json({
        success: true,
        question,
        answer:  "❌ I could not find this information in the uploaded documents.",
        sources: [],
        ms:      Date.now() - t0,
      });
    }

    const context = buildContext(results);
    const answer  = await askLLM(question, context);

    return res.json({
      success: true,
      question,
      answer,
      sources: results.map(r => ({
        score: +r.score.toFixed(4),
        file:  r.payload?.file,
        page:  r.payload?.page,
        chunk: r.payload?.chunk,
      })),
      ms: Date.now() - t0,
    });

  } catch (err) {
    console.error("❌ /ask error:", err);
    return res.status(500).json({ success: false, error: err.message });
  }
});

// ─── /ask/stream — word-by-word SSE ──────────────────────────────────────────
// The LLM streams tokens (which may be partial words or multi-word chunks).
// We split every incoming token on whitespace and emit each word as a separate
// SSE "token" event so the frontend can animate them one-by-one.
app.post("/ask/stream", async (req, res) => {
  const { question } = req.body ?? {};

  if (!question?.trim()) {
    return res.status(400).json({ success: false, error: "question is required" });
  }

  res.setHeader("Content-Type",  "text/event-stream");
  res.setHeader("Cache-Control", "no-cache");
  res.setHeader("Connection",    "keep-alive");
  res.flushHeaders();

  const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);

  try {
    send("status", { message: "🔍 Searching documents..." });
    const embedding = await createEmbedding(question.trim());

    const results = await searchQdrant(embedding);

    if (!results.length) {
      send("token",  { token: "❌", isWord: true });
      send("token",  { token: "I", isWord: true });
      send("token",  { token: "could", isWord: true });
      send("token",  { token: "not", isWord: true });
      send("token",  { token: "find", isWord: true });
      send("token",  { token: "this", isWord: true });
      send("token",  { token: "information", isWord: true });
      send("token",  { token: "in", isWord: true });
      send("token",  { token: "the", isWord: true });
      send("token",  { token: "uploaded", isWord: true });
      send("token",  { token: "documents.", isWord: true });
      send("done",   { sources: [] });
      return res.end();
    }

    const sources = results.map(r => ({
      score: +r.score.toFixed(4),
      file:  r.payload?.file,
      page:  r.payload?.page,
      chunk: r.payload?.chunk,
    }));
    send("sources", { sources });

    send("status", { message: "💬 Generating answer..." });

    const context = buildContext(results);
    const stream  = await llm.chat.completions.create({
      model:       CONFIG.azure.deployment,
      temperature: 0,
      max_tokens:  1500,
      stream:      true,
      messages: [
        { role: "system", content: SYSTEM_PROMPT },
        { role: "user",   content: `Context:\n${context}\n\nQuestion:\n${question}` },
      ],
    });

    // Buffer to handle tokens that may be split mid-word
    let wordBuffer = "";

    for await (const chunk of stream) {
      const rawToken = chunk.choices[0]?.delta?.content ?? "";
      if (!rawToken) continue;

      wordBuffer += rawToken;

      // Split on whitespace — emit complete words, keep trailing partial
      // We preserve newlines/markdown as separate tokens so markdown renders correctly
      const parts = wordBuffer.split(/(\s+)/);

      // Last element might be an incomplete word — buffer it
      wordBuffer = parts.pop() ?? "";

      for (const part of parts) {
        if (part) {
          send("token", { token: part, isWord: /\S/.test(part) });
        }
      }
    }

    // Flush any remaining buffered text
    if (wordBuffer) {
      send("token", { token: wordBuffer, isWord: true });
    }

    send("done", { sources });

  } catch (err) {
    console.error("❌ /ask/stream error:", err);
    send("error", { error: err.message });
  }

  res.end();
});

app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" }));

// ─── Start ────────────────────────────────────────────────────────────────────
async function start() {
  await getEmbedder();

  app.listen(CONFIG.port, () => {
    console.log(`Server running on port ${CONFIG.port}`);
  });
}

start().catch(err => {
  console.error("Fatal startup error:", err);
  process.exit(1);
});