gyanBuddy/controller/askQuestion.js

require("dotenv").config();
const postgre = require('../database/postgre');
const express = require("express");
const cors = require("cors");
const OpenAI = require("openai");
const { pipeline } = require("@xenova/transformers");
const { QdrantClient } = require("@qdrant/js-client-rest");

const CONFIG = {
  azure: {
    endpoint: process.env.AZURE_OPENAI_ENDPOINT,
    deployment: process.env.AZURE_DEPLOYMENT,
    apiVersion: process.env.AZURE_API_VERSION,
    apiKey: process.env.AZURE_OPENAI_KEY,
  },
  qdrant: {
    url: process.env.QDRANT_URL,
    collection: process.env.QDRANT_COLLECTION,
  },
  search: {
    topK: 20,
    minScore: 0.10,
    maxContextDocs: 10,
  },
  port: process.env.PORT || 5000,
};

// ─── Clients ──────────────────────────────────────────────────────────────────
const llm = new OpenAI({
  baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
  apiKey: CONFIG.azure.apiKey,
  defaultHeaders: { "api-key": CONFIG.azure.apiKey },
  defaultQuery: { "api-version": CONFIG.azure.apiVersion },
});

const qdrant = new QdrantClient({
  url: CONFIG.qdrant.url,
  checkCompatibility: false,
  timeout: 30000,
});

// ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
let _embedder = null;
async function getEmbedder() {
  if (!_embedder) {
    console.log("Loading MiniLM model...");
    _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
    console.log("Embedding model ready");
  }
  return _embedder;
}

async function createEmbedding(text) {
  const model = await getEmbedder();
  const out = await model(text, { pooling: "mean", normalize: true });
  return Array.from(out.data);
}

// ─── Qdrant search ────────────────────────────────────────────────────────────
async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
  const results = await qdrant.search(CONFIG.qdrant.collection, {
    vector: embedding,
    limit: topK,
    with_payload: true,
    score_threshold: minScore,
  });

  console.log(`Qdrant returned ${results.length} results (threshold: ${minScore})`);
  return results
    .sort((a, b) => b.score - a.score)
    .slice(0, maxContextDocs);
}

// ─── Build LLM context string ─────────────────────────────────────────────────
function buildContext(results) {
  return results
    .map((item, i) =>
      `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
    )
    .join("\n\n---\n\n");
}

const SYSTEM_PROMPT = `
You are CPM AI Assistant.

Rules:
- Answer only from the provided information.
- If the answer is not available, reply exactly:
  "❌ I could not find this information in the uploaded documents."
- Do not make up information.
- Do not mention documents, context, or chunks.
- Reply in the same language and style as the user's question.
- If the user asks in Hindi, answer in Hindi.
- If the user asks in Hinglish, answer in Hinglish.
- If the user asks in English, answer in English.

Response Style:
- Use simple and easy-to-understand language.
- Keep answers short and clear.
- Use headings and bullet points when helpful.
- Highlight important words in **bold**.

Format:

# 📋 Topic

## 🎯 Summary
Short answer in the user's language.

## ✅ Details
- Point 1
- Point 2
- Point 3

## ⚠️ Notes
- Extra information (if available).
`.trim();

async function askLLM(question, context) {
  const completion = await llm.chat.completions.create({
    model: CONFIG.azure.deployment,
    temperature: 0,
    max_tokens: 1500,
    messages: [
      { role: "system", content: SYSTEM_PROMPT },
      { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
    ],
  });

  return completion.choices[0].message.content;
}

const health = async (req, res) => {
  res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
}

const ask = async (req, res) => {


  const { question } = req.body ?? {};

  if (!question?.trim()) {
    return res.status(400).json({ success: false, error: "question is required" });
  }

  const t0 = Date.now();

  try {
    const embedding = await createEmbedding(question.trim());
    const results = await searchQdrant(embedding);

    if (!results.length) {
      return res.json({
        success: true,
        question,
        answer: "❌ I could not find this information in the uploaded documents.",
        sources: [],
        ms: Date.now() - t0,
      });
    }

    const context = buildContext(results);
    const answer = await askLLM(question, context);

    return res.json({
      success: true,
      question,
      answer,
      sources: results.map(r => ({
        score: +r.score.toFixed(4),
        file: r.payload?.file,
        page: r.payload?.page,
        chunk: r.payload?.chunk,
      })),
      ms: Date.now() - t0,
    });

  } catch (err) {
    console.error("❌ /ask error:", err);
    return res.status(500).json({ success: false, error: err.message });
  }
}
const askstream = async (req, res) => {

  const { question } = req.body ?? {};
  const user_id = req.user.id;
  console.log("Received question:", user_id);

  if (!question?.trim()) {
    return res.status(400).json({ success: false, error: "question is required" });
  }


  res.setHeader("Content-Type", "text/event-stream");
  res.setHeader("Cache-Control", "no-cache");
  res.setHeader("Connection", "keep-alive");
  res.flushHeaders();

  const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);

  try {
    send("status", { message: "🔍 Searching documents..." });
    const embedding = await createEmbedding(question.trim());

    const results = await searchQdrant(embedding);


    if (!results.length) {
      send("token", { token: "❌", isWord: true });
      send("token", { token: "I", isWord: true });
      send("token", { token: "could", isWord: true });
      send("token", { token: "not", isWord: true });
      send("token", { token: "find", isWord: true });
      send("token", { token: "this", isWord: true });
      send("token", { token: "information", isWord: true });
      send("token", { token: "in", isWord: true });
      send("token", { token: "the", isWord: true });
      send("token", { token: "uploaded", isWord: true });
      send("token", { token: "documents.", isWord: true });
      send("done", { sources: [] });

      const result = await postgre.query(
        'insert into useraskquestion (user_id, questions) values ($1, $2)',
        [user_id, question]
      );

      return res.end();
    }

      const result = await postgre.query(
        'insert into useraskquestion (user_id, questions,status) values ($1, $2, $3)',
        [user_id, question,'1']
      );

    const sources = results.map(r => ({
      score: +r.score.toFixed(4),
      file: r.payload?.file,
      page: r.payload?.page,
      chunk: r.payload?.chunk,
    }));
    send("sources", { sources });

    send("status", { message: "💬 Generating answer..." });

    const context = buildContext(results);
    const stream = await llm.chat.completions.create({
      model: CONFIG.azure.deployment,
      temperature: 0,
      max_tokens: 1500,
      stream: true,
      messages: [
        { role: "system", content: SYSTEM_PROMPT },
        { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
      ],
    });

    // Buffer to handle tokens that may be split mid-word
    let wordBuffer = "";

    for await (const chunk of stream) {
      const rawToken = chunk.choices[0]?.delta?.content ?? "";
      if (!rawToken) continue;

      wordBuffer += rawToken;

      // Split on whitespace — emit complete words, keep trailing partial
      // We preserve newlines/markdown as separate tokens so markdown renders correctly
      const parts = wordBuffer.split(/(\s+)/);

      // Last element might be an incomplete word — buffer it
      wordBuffer = parts.pop() ?? "";

      for (const part of parts) {
        if (part) {
          send("token", { token: part, isWord: /\S/.test(part) });
        }
      }
    }

    // Flush any remaining buffered text
    if (wordBuffer) {
      send("token", { token: wordBuffer, isWord: true });
    }

    send("done", { sources });

  } catch (err) {
    console.error("❌ /ask/stream error:", err);
    send("error", { error: err.message });
  }

  res.end();
}

module.exports = { ask, askstream, health };