structure ready

2026-06-15 15:43:09 +05:30
commit 41d82f9266
12 changed files with 6460 additions and 0 deletions
@@ -0,0 +1,54 @@
+const postgre = require('../database/postgre');
+
+
+const AnsweredQuestions = async (req, res) => {
+  try {
+   
+    const user_id = req.user.id; 
+
+    const result = await postgre.query(
+      'SELECT * FROM useraskquestion WHERE user_id = $1 And status = $2 ORDER BY id DESC',
+      [user_id, '1']
+    );
+
+    res.json({
+      success: true,
+      data: result.rows
+    });
+
+  } catch (error) {
+    res.status(500).json({
+      success: false,
+      message: error.message
+    });
+  }
+};
+
+
+const UnansweredQuestions = async (req, res) => {
+  try {
+   
+    const user_id = req.user.id; 
+
+    const result = await postgre.query(
+      'SELECT * FROM useraskquestion WHERE user_id = $1 And status = $2 ORDER BY id DESC',
+      [user_id, '0']
+    );
+
+    res.json({
+      success: true,
+      data: result.rows
+    });
+
+  } catch (error) {
+    res.status(500).json({
+      success: false,
+      message: error.message
+    });
+  }
+};
+
+module.exports = {
+  AnsweredQuestions,
+  UnansweredQuestions
+};
@@ -0,0 +1,298 @@
+require("dotenv").config();
+const postgre = require('../database/postgre');
+const express = require("express");
+const cors = require("cors");
+const OpenAI = require("openai");
+const { pipeline } = require("@xenova/transformers");
+const { QdrantClient } = require("@qdrant/js-client-rest");
+
+const CONFIG = {
+  azure: {
+    endpoint: process.env.AZURE_OPENAI_ENDPOINT,
+    deployment: process.env.AZURE_DEPLOYMENT,
+    apiVersion: process.env.AZURE_API_VERSION,
+    apiKey: process.env.AZURE_OPENAI_KEY,
+  },
+  qdrant: {
+    url: process.env.QDRANT_URL,
+    collection: process.env.QDRANT_COLLECTION,
+  },
+  search: {
+    topK: 20,
+    minScore: 0.10,
+    maxContextDocs: 10,
+  },
+  port: process.env.PORT || 5000,
+};
+
+// ─── Clients ──────────────────────────────────────────────────────────────────
+const llm = new OpenAI({
+  baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
+  apiKey: CONFIG.azure.apiKey,
+  defaultHeaders: { "api-key": CONFIG.azure.apiKey },
+  defaultQuery: { "api-version": CONFIG.azure.apiVersion },
+});
+
+const qdrant = new QdrantClient({
+  url: CONFIG.qdrant.url,
+  checkCompatibility: false,
+  timeout: 30000,
+});
+
+// ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
+let _embedder = null;
+async function getEmbedder() {
+  if (!_embedder) {
+    console.log("Loading MiniLM model...");
+    _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
+    console.log("Embedding model ready");
+  }
+  return _embedder;
+}
+
+async function createEmbedding(text) {
+  const model = await getEmbedder();
+  const out = await model(text, { pooling: "mean", normalize: true });
+  return Array.from(out.data);
+}
+
+// ─── Qdrant search ────────────────────────────────────────────────────────────
+async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
+  const results = await qdrant.search(CONFIG.qdrant.collection, {
+    vector: embedding,
+    limit: topK,
+    with_payload: true,
+    score_threshold: minScore,
+  });
+
+  console.log(`Qdrant returned ${results.length} results (threshold: ${minScore})`);
+  return results
+    .sort((a, b) => b.score - a.score)
+    .slice(0, maxContextDocs);
+}
+
+// ─── Build LLM context string ─────────────────────────────────────────────────
+function buildContext(results) {
+  return results
+    .map((item, i) =>
+      `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
+    )
+    .join("\n\n---\n\n");
+}
+
+const SYSTEM_PROMPT = `
+You are CPM AI Assistant.
+
+Rules:
+- Answer only from the provided information.
+- If the answer is not available, reply exactly:
+  "❌ I could not find this information in the uploaded documents."
+- Do not make up information.
+- Do not mention documents, context, or chunks.
+- Reply in the same language and style as the user's question.
+- If the user asks in Hindi, answer in Hindi.
+- If the user asks in Hinglish, answer in Hinglish.
+- If the user asks in English, answer in English.
+
+Response Style:
+- Use simple and easy-to-understand language.
+- Keep answers short and clear.
+- Use headings and bullet points when helpful.
+- Highlight important words in **bold**.
+
+Format:
+
+# 📋 Topic
+
+## 🎯 Summary
+Short answer in the user's language.
+
+## ✅ Details
+- Point 1
+- Point 2
+- Point 3
+
+## ⚠️ Notes
+- Extra information (if available).
+`.trim();
+
+async function askLLM(question, context) {
+  const completion = await llm.chat.completions.create({
+    model: CONFIG.azure.deployment,
+    temperature: 0,
+    max_tokens: 1500,
+    messages: [
+      { role: "system", content: SYSTEM_PROMPT },
+      { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
+    ],
+  });
+
+  return completion.choices[0].message.content;
+}
+
+const health = async (req, res) => {
+  res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
+}
+
+const ask = async (req, res) => {
+
+
+  const { question } = req.body ?? {};
+
+  if (!question?.trim()) {
+    return res.status(400).json({ success: false, error: "question is required" });
+  }
+
+  const t0 = Date.now();
+
+  try {
+    const embedding = await createEmbedding(question.trim());
+    const results = await searchQdrant(embedding);
+
+    if (!results.length) {
+      return res.json({
+        success: true,
+        question,
+        answer: "❌ I could not find this information in the uploaded documents.",
+        sources: [],
+        ms: Date.now() - t0,
+      });
+    }
+
+    const context = buildContext(results);
+    const answer = await askLLM(question, context);
+
+    return res.json({
+      success: true,
+      question,
+      answer,
+      sources: results.map(r => ({
+        score: +r.score.toFixed(4),
+        file: r.payload?.file,
+        page: r.payload?.page,
+        chunk: r.payload?.chunk,
+      })),
+      ms: Date.now() - t0,
+    });
+
+  } catch (err) {
+    console.error("❌ /ask error:", err);
+    return res.status(500).json({ success: false, error: err.message });
+  }
+}
+const askstream = async (req, res) => {
+
+  const { question } = req.body ?? {};
+  const user_id = req.user.id;
+  console.log("Received question:", user_id);
+
+  if (!question?.trim()) {
+    return res.status(400).json({ success: false, error: "question is required" });
+  }
+
+
+
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+  res.flushHeaders();
+
+  const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
+
+  try {
+    send("status", { message: "🔍 Searching documents..." });
+    const embedding = await createEmbedding(question.trim());
+
+    const results = await searchQdrant(embedding);
+
+
+    if (!results.length) { 
+      send("token", { token: "❌", isWord: true });
+      send("token", { token: "I", isWord: true });
+      send("token", { token: "could", isWord: true });
+      send("token", { token: "not", isWord: true });
+      send("token", { token: "find", isWord: true });
+      send("token", { token: "this", isWord: true });
+      send("token", { token: "information", isWord: true });
+      send("token", { token: "in", isWord: true });
+      send("token", { token: "the", isWord: true });
+      send("token", { token: "uploaded", isWord: true });
+      send("token", { token: "documents.", isWord: true });
+      send("done", { sources: [] });
+
+      const result = await postgre.query(
+        'insert into useraskquestion (user_id, questions) values ($1, $2)',
+        [user_id, question]
+      );
+
+      return res.end();
+    }
+
+      const result = await postgre.query(
+        'insert into useraskquestion (user_id, questions,status) values ($1, $2, $3)',
+        [user_id, question,'1']
+      );
+
+    const sources = results.map(r => ({
+      score: +r.score.toFixed(4),
+      file: r.payload?.file,
+      page: r.payload?.page,
+      chunk: r.payload?.chunk,
+    }));
+    send("sources", { sources });
+
+    send("status", { message: "💬 Generating answer..." });
+
+    const context = buildContext(results);
+    const stream = await llm.chat.completions.create({
+      model: CONFIG.azure.deployment,
+      temperature: 0,
+      max_tokens: 1500,
+      stream: true,
+      messages: [
+        { role: "system", content: SYSTEM_PROMPT },
+        { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
+      ],
+    });
+
+    // Buffer to handle tokens that may be split mid-word
+    let wordBuffer = "";
+
+    for await (const chunk of stream) {
+      const rawToken = chunk.choices[0]?.delta?.content ?? "";
+      if (!rawToken) continue;
+
+      wordBuffer += rawToken;
+
+      // Split on whitespace — emit complete words, keep trailing partial
+      // We preserve newlines/markdown as separate tokens so markdown renders correctly
+      const parts = wordBuffer.split(/(\s+)/);
+
+      // Last element might be an incomplete word — buffer it
+      wordBuffer = parts.pop() ?? "";
+
+      for (const part of parts) {
+        if (part) {
+          send("token", { token: part, isWord: /\S/.test(part) });
+        }
+      }
+    }
+
+    // Flush any remaining buffered text
+    if (wordBuffer) {
+      send("token", { token: wordBuffer, isWord: true });
+    }
+
+    send("done", { sources });
+
+  } catch (err) {
+    console.error("❌ /ask/stream error:", err);
+    send("error", { error: err.message });
+  }
+
+  res.end();
+}
+
+module.exports = { ask, askstream, health };
+
+
@@ -0,0 +1,56 @@
+const postgre = require('../database/postgre');
+const jwt = require('jsonwebtoken');
+
+const JWT_SECRET = 'secretkey';
+
+const loginUser = async (req, res) => {
+  try {
+    const { email, password } = req.body;
+
+    if (!email || !password) {
+      return res.status(400).json({
+        success: false,
+        message: 'Email and password are required'
+      });
+    }
+
+    const result = await postgre.query(
+      'SELECT * FROM users WHERE email = $1 AND password = $2',
+      [email, password]
+    );
+
+    if (result.rows.length === 0) {
+      return res.status(401).json({
+        success: false,
+        message: 'Invalid email or password'
+      });
+    }
+
+    const user = result.rows[0];
+
+    const token = jwt.sign(
+      {
+        id: user.id,
+        user: user.user
+      },
+      JWT_SECRET,
+      { expiresIn: '24h' }
+    );
+
+    res.status(200).json({
+      success: true,
+      message: 'Login successful',
+      token
+    });
+
+  } catch (error) {
+    console.error(error);
+
+    res.status(500).json({
+      success: false,
+      message: error.message
+    });
+  }
+};
+
+module.exports = { loginUser };
@@ -0,0 +1,142 @@
+const { QdrantClient } = require("@qdrant/js-client-rest");
+const { pipeline }     = require("@xenova/transformers");
+
+const COLLECTION = process.env.QDRANT_COLLECTION
+const qdrant     = new QdrantClient({
+  url:                process.env.QDRANT_URL,
+  checkCompatibility: false,
+  timeout:            30000,
+});
+
+
+const _warn = console.warn;
+console.warn = (msg, ...args) => {
+  if (typeof msg === "string" && (msg.includes("UnknownErrorException") || msg.includes("TT:"))) return;
+  _warn(msg, ...args);
+};
+
+
+let embedder = null;
+async function getEmbedder() {
+  if (!embedder) {
+    embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
+  }
+  return embedder;
+}
+
+async function createEmbedding(text) {
+  const model  = await getEmbedder();
+  const output = await model(text, { pooling: "mean", normalize: true });
+  return Array.from(output.data);
+}
+
+
+async function ensureCollection() {
+  try {
+    await qdrant.getCollection(COLLECTION);
+  } catch {
+    await qdrant.createCollection(COLLECTION, {
+      vectors: { size: 384, distance: "Cosine" },
+    });
+    console.log(`[QDRANT] Collection '${COLLECTION}' created.`);
+  }
+}
+
+
+function chunkText(text, chunkSize = 1200, overlap = 250) {
+  const chunks = [];
+  text = text.replace(/\s+/g, " ").trim();
+  let start = 0;
+  while (start < text.length) {
+    let end = start + chunkSize;
+    if (end < text.length) {
+      const lastPeriod = text.lastIndexOf(".", end);
+      if (lastPeriod > start) end = lastPeriod + 1;
+    }
+    const chunk = text.slice(start, end).trim();
+    if (chunk.length > 50) chunks.push(chunk);
+    start = end - overlap;
+  }
+  return chunks;
+}
+
+
+async function extractAndInsert(buffer, fileName) {
+  const { getDocument } = await import("pdfjs-dist/legacy/build/pdf.mjs");
+
+  const pdf = await getDocument({ data: new Uint8Array(buffer) }).promise;
+  console.log(`[PDF] ${fileName} — ${pdf.numPages} pages`);
+
+  await ensureCollection();
+
+  const BATCH   = 50;
+  let batch     = [];
+  let chunkIdx  = 0;
+  let total     = 0;
+
+  for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
+    console.log(`[PAGE] ${pageNum}/${pdf.numPages}`);
+
+    const page    = await pdf.getPage(pageNum);
+    const content = await page.getTextContent();
+    const text    = content.items.map(i => i.str).join(" ");
+
+    if (!text || text.trim().length === 0) continue;
+
+    const chunks = chunkText(text);
+
+    for (const chunk of chunks) {
+      const vector = await createEmbedding(chunk);
+
+      batch.push({
+        id:      Number(`${Date.now()}${chunkIdx}`),
+        vector,
+        payload: {
+          file:       fileName,
+          page:       pageNum,
+          chunk:      chunkIdx,
+          text:       chunk,
+          created_at: new Date().toISOString(),
+        },
+      });
+
+      chunkIdx++;
+
+      if (batch.length >= BATCH) {
+        await qdrant.upsert(COLLECTION, { wait: true, points: batch });
+        console.log(`[UPSERT] ${batch.length} vectors`);
+        total += batch.length;
+        batch  = [];
+      }
+    }
+  }
+
+
+  if (batch.length > 0) {
+    await qdrant.upsert(COLLECTION, { wait: true, points: batch });
+    console.log(`[UPSERT] ${batch.length} vectors (final)`);
+    total += batch.length;
+  }
+
+  return { fileName, pages: pdf.numPages, vectors: total };
+}
+
+
+const upload = async (req, res) => {
+  if (!req.file) {
+    return res.status(400).json({ success: false, error: "No file. Use field name 'pdf'." });
+  }
+
+  const t0 = Date.now();
+  console.log(`[UPLOAD] ${req.file.originalname} (${(req.file.size / 1024).toFixed(1)} KB)`);
+
+  try {
+    const result = await extractAndInsert(req.file.buffer, req.file.originalname);
+    return res.json({ success: true, ms: Date.now() - t0, ...result });
+  } catch (err) {
+    console.error("[QDRANT INSERT ERROR]", err.message);
+    return res.status(500).json({ success: false, error: err.message });
+  }
+};
+
+module.exports = { upload };