gyanBuddy/server.js

require("dotenv").config();

const express = require("express");
const cors = require("cors");
const OpenAI = require("openai");
const { pipeline } = require("@xenova/transformers");
const { QdrantClient } = require("@qdrant/js-client-rest");

const CONFIG = {
  azure: {
    endpoint:   process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com",
    deployment: process.env.AZURE_DEPLOYMENT       || "gpt-4o-mini",
    apiVersion: process.env.AZURE_API_VERSION      || "2024-08-01-preview",
    apiKey:     process.env.AZURE_OPENAI_KEY,
  },
  qdrant: {
    url:        process.env.QDRANT_URL             || "http://20.40.61.65:6333",
    collection: process.env.QDRANT_COLLECTION      || "pdf_rag",
  },
  search: {
    topK:           20,
    minScore:       0.10,
    maxContextDocs: 10,
  },
  port: process.env.PORT || 5000,
};

// ─── Clients ──────────────────────────────────────────────────────────────────
const llm = new OpenAI({
  baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
  apiKey:  CONFIG.azure.apiKey,
  defaultHeaders: { "api-key": CONFIG.azure.apiKey },
  defaultQuery:   { "api-version": CONFIG.azure.apiVersion },
});

const qdrant = new QdrantClient({
  url:                CONFIG.qdrant.url,
  checkCompatibility: false,
  timeout:            30000,
});

// ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
let _embedder = null;
async function getEmbedder() {
  if (!_embedder) {
    console.log("Loading MiniLM model...");
    _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
    console.log("Embedding model ready");
  }
  return _embedder;
}

async function createEmbedding(text) {
  const model = await getEmbedder();
  const out = await model(text, { pooling: "mean", normalize: true });
  return Array.from(out.data);
}

// ─── Qdrant search ────────────────────────────────────────────────────────────
async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
  const results = await qdrant.search(CONFIG.qdrant.collection, {
    vector:      embedding,
    limit:       topK,
    with_payload: true,
    score_threshold: minScore,
  });

  return results
    .sort((a, b) => b.score - a.score)
    .slice(0, maxContextDocs);
}

// ─── Build LLM context string ─────────────────────────────────────────────────
function buildContext(results) {
  return results
    .map((item, i) =>
      `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
    )
    .join("\n\n---\n\n");
}

// ─── LLM call ─────────────────────────────────────────────────────────────────
const SYSTEM_PROMPT = `
You are CPM AI Assistant.

Rules:
- Answer only from the provided information.
- If the answer is not available, reply exactly:
  "❌ I could not find this information in the uploaded documents."
- Do not make up information.
- Do not mention documents, context, or chunks.

Response Style:
- Use simple English.
- Keep answers short and clear.
- Use headings and bullet points.
- Highlight important words in **bold**.
- Use emojis in headings.

Format:

# 📋 Topic

## 🎯 Summary
Short answer in 1-2 sentences.

## ✅ Details
- Point 1
- Point 2
- Point 3

## ⚠️ Notes
- Extra information (if available).
`.trim();

async function askLLM(question, context) {
  const completion = await llm.chat.completions.create({
    model:       CONFIG.azure.deployment,
    temperature: 0,
    max_tokens:  1500,
    messages: [
      { role: "system", content: SYSTEM_PROMPT },
      { role: "user",   content: `Context:\n${context}\n\nQuestion:\n${question}` },
    ],
  });

  return completion.choices[0].message.content;
}

// ─── Express app ──────────────────────────────────────────────────────────────
const app = express();
app.use(cors());
app.use(express.json({ limit: "1mb" }));
app.use(express.urlencoded({ extended: true }));

app.use((req, _res, next) => {
  console.log(`→ ${req.method} ${req.path}`);
  next();
});

// ─── Routes ───────────────────────────────────────────────────────────────────
app.get("/health", (_req, res) => {
  res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
});

app.post("/ask", async (req, res) => {
  const { question } = req.body ?? {};

  if (!question?.trim()) {
    return res.status(400).json({ success: false, error: "question is required" });
  }

  const t0 = Date.now();

  try {
    const embedding = await createEmbedding(question.trim());
    const results = await searchQdrant(embedding);

    if (!results.length) {
      return res.json({
        success: true,
        question,
        answer:  "❌ I could not find this information in the uploaded documents.",
        sources: [],
        ms:      Date.now() - t0,
      });
    }

    const context = buildContext(results);
    const answer  = await askLLM(question, context);

    return res.json({
      success: true,
      question,
      answer,
      sources: results.map(r => ({
        score: +r.score.toFixed(4),
        file:  r.payload?.file,
        page:  r.payload?.page,
        chunk: r.payload?.chunk,
      })),
      ms: Date.now() - t0,
    });

  } catch (err) {
    console.error("❌ /ask error:", err);
    return res.status(500).json({ success: false, error: err.message });
  }
});

app.post("/ask/stream", async (req, res) => {
  const { question } = req.body ?? {};

  if (!question?.trim()) {
    return res.status(400).json({ success: false, error: "question is required" });
  }

  res.setHeader("Content-Type",  "text/event-stream");
  res.setHeader("Cache-Control", "no-cache");
  res.setHeader("Connection",    "keep-alive");
  res.flushHeaders();

  const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);

  try {
    send("status", { message: "🔍 Searching documents..." });
    const embedding = await createEmbedding(question.trim());

    const results = await searchQdrant(embedding);

    if (!results.length) {
      send("token",  { token: "❌", isWord: true });
      send("token",  { token: "I", isWord: true });
      send("token",  { token: "could", isWord: true });
      send("token",  { token: "not", isWord: true });
      send("token",  { token: "find", isWord: true });
      send("token",  { token: "this", isWord: true });
      send("token",  { token: "information", isWord: true });
      send("token",  { token: "in", isWord: true });
      send("token",  { token: "the", isWord: true });
      send("token",  { token: "uploaded", isWord: true });
      send("token",  { token: "documents.", isWord: true });
      send("done",   { sources: [] });
      return res.end();
    }

    const sources = results.map(r => ({
      score: +r.score.toFixed(4),
      file:  r.payload?.file,
      page:  r.payload?.page,
      chunk: r.payload?.chunk,
    }));
    send("sources", { sources });

    send("status", { message: "💬 Generating answer..." });

    const context = buildContext(results);
    const stream  = await llm.chat.completions.create({
      model:       CONFIG.azure.deployment,
      temperature: 0,
      max_tokens:  1500,
      stream:      true,
      messages: [
        { role: "system", content: SYSTEM_PROMPT },
        { role: "user",   content: `Context:\n${context}\n\nQuestion:\n${question}` },
      ],
    });

    // Buffer to handle tokens that may be split mid-word
    let wordBuffer = "";

    for await (const chunk of stream) {
      const rawToken = chunk.choices[0]?.delta?.content ?? "";
      if (!rawToken) continue;

      wordBuffer += rawToken;

      // Split on whitespace — emit complete words, keep trailing partial
      // We preserve newlines/markdown as separate tokens so markdown renders correctly
      const parts = wordBuffer.split(/(\s+)/);

      // Last element might be an incomplete word — buffer it
      wordBuffer = parts.pop() ?? "";

      for (const part of parts) {
        if (part) {
          send("token", { token: part, isWord: /\S/.test(part) });
        }
      }
    }

    // Flush any remaining buffered text
    if (wordBuffer) {
      send("token", { token: wordBuffer, isWord: true });
    }

    send("done", { sources });

  } catch (err) {
    console.error("❌ /ask/stream error:", err);
    send("error", { error: err.message });
  }

  res.end();
});

app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" }));

// ─── Start ────────────────────────────────────────────────────────────────────
async function start() {
  await getEmbedder();

  app.listen(CONFIG.port, () => {
    console.log(`Server running on port ${CONFIG.port}`);
  });
}

start().catch(err => {
  console.error("Fatal startup error:", err);
  process.exit(1);
});