@@ -0,0 +1,54 @@
|
||||
const postgre = require('../database/postgre');
|
||||
|
||||
|
||||
const AnsweredQuestions = async (req, res) => {
|
||||
try {
|
||||
|
||||
const user_id = req.user.id;
|
||||
|
||||
const result = await postgre.query(
|
||||
'SELECT * FROM useraskquestion WHERE user_id = $1 And status = $2 ORDER BY id DESC',
|
||||
[user_id, '1']
|
||||
);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: result.rows
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
const UnansweredQuestions = async (req, res) => {
|
||||
try {
|
||||
|
||||
const user_id = req.user.id;
|
||||
|
||||
const result = await postgre.query(
|
||||
'SELECT * FROM useraskquestion WHERE user_id = $1 And status = $2 ORDER BY id DESC',
|
||||
[user_id, '0']
|
||||
);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: result.rows
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
AnsweredQuestions,
|
||||
UnansweredQuestions
|
||||
};
|
||||
@@ -0,0 +1,298 @@
|
||||
require("dotenv").config();
|
||||
const postgre = require('../database/postgre');
|
||||
const express = require("express");
|
||||
const cors = require("cors");
|
||||
const OpenAI = require("openai");
|
||||
const { pipeline } = require("@xenova/transformers");
|
||||
const { QdrantClient } = require("@qdrant/js-client-rest");
|
||||
|
||||
const CONFIG = {
|
||||
azure: {
|
||||
endpoint: process.env.AZURE_OPENAI_ENDPOINT,
|
||||
deployment: process.env.AZURE_DEPLOYMENT,
|
||||
apiVersion: process.env.AZURE_API_VERSION,
|
||||
apiKey: process.env.AZURE_OPENAI_KEY,
|
||||
},
|
||||
qdrant: {
|
||||
url: process.env.QDRANT_URL,
|
||||
collection: process.env.QDRANT_COLLECTION,
|
||||
},
|
||||
search: {
|
||||
topK: 20,
|
||||
minScore: 0.10,
|
||||
maxContextDocs: 10,
|
||||
},
|
||||
port: process.env.PORT || 5000,
|
||||
};
|
||||
|
||||
// ─── Clients ──────────────────────────────────────────────────────────────────
|
||||
const llm = new OpenAI({
|
||||
baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
|
||||
apiKey: CONFIG.azure.apiKey,
|
||||
defaultHeaders: { "api-key": CONFIG.azure.apiKey },
|
||||
defaultQuery: { "api-version": CONFIG.azure.apiVersion },
|
||||
});
|
||||
|
||||
const qdrant = new QdrantClient({
|
||||
url: CONFIG.qdrant.url,
|
||||
checkCompatibility: false,
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
// ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
|
||||
let _embedder = null;
|
||||
async function getEmbedder() {
|
||||
if (!_embedder) {
|
||||
console.log("Loading MiniLM model...");
|
||||
_embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
|
||||
console.log("Embedding model ready");
|
||||
}
|
||||
return _embedder;
|
||||
}
|
||||
|
||||
async function createEmbedding(text) {
|
||||
const model = await getEmbedder();
|
||||
const out = await model(text, { pooling: "mean", normalize: true });
|
||||
return Array.from(out.data);
|
||||
}
|
||||
|
||||
// ─── Qdrant search ────────────────────────────────────────────────────────────
|
||||
async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
|
||||
const results = await qdrant.search(CONFIG.qdrant.collection, {
|
||||
vector: embedding,
|
||||
limit: topK,
|
||||
with_payload: true,
|
||||
score_threshold: minScore,
|
||||
});
|
||||
|
||||
console.log(`Qdrant returned ${results.length} results (threshold: ${minScore})`);
|
||||
return results
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, maxContextDocs);
|
||||
}
|
||||
|
||||
// ─── Build LLM context string ─────────────────────────────────────────────────
|
||||
function buildContext(results) {
|
||||
return results
|
||||
.map((item, i) =>
|
||||
`[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
|
||||
)
|
||||
.join("\n\n---\n\n");
|
||||
}
|
||||
|
||||
const SYSTEM_PROMPT = `
|
||||
You are CPM AI Assistant.
|
||||
|
||||
Rules:
|
||||
- Answer only from the provided information.
|
||||
- If the answer is not available, reply exactly:
|
||||
"❌ I could not find this information in the uploaded documents."
|
||||
- Do not make up information.
|
||||
- Do not mention documents, context, or chunks.
|
||||
- Reply in the same language and style as the user's question.
|
||||
- If the user asks in Hindi, answer in Hindi.
|
||||
- If the user asks in Hinglish, answer in Hinglish.
|
||||
- If the user asks in English, answer in English.
|
||||
|
||||
Response Style:
|
||||
- Use simple and easy-to-understand language.
|
||||
- Keep answers short and clear.
|
||||
- Use headings and bullet points when helpful.
|
||||
- Highlight important words in **bold**.
|
||||
|
||||
Format:
|
||||
|
||||
# 📋 Topic
|
||||
|
||||
## 🎯 Summary
|
||||
Short answer in the user's language.
|
||||
|
||||
## ✅ Details
|
||||
- Point 1
|
||||
- Point 2
|
||||
- Point 3
|
||||
|
||||
## ⚠️ Notes
|
||||
- Extra information (if available).
|
||||
`.trim();
|
||||
|
||||
async function askLLM(question, context) {
|
||||
const completion = await llm.chat.completions.create({
|
||||
model: CONFIG.azure.deployment,
|
||||
temperature: 0,
|
||||
max_tokens: 1500,
|
||||
messages: [
|
||||
{ role: "system", content: SYSTEM_PROMPT },
|
||||
{ role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
|
||||
],
|
||||
});
|
||||
|
||||
return completion.choices[0].message.content;
|
||||
}
|
||||
|
||||
const health = async (req, res) => {
|
||||
res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
|
||||
}
|
||||
|
||||
const ask = async (req, res) => {
|
||||
|
||||
|
||||
const { question } = req.body ?? {};
|
||||
|
||||
if (!question?.trim()) {
|
||||
return res.status(400).json({ success: false, error: "question is required" });
|
||||
}
|
||||
|
||||
const t0 = Date.now();
|
||||
|
||||
try {
|
||||
const embedding = await createEmbedding(question.trim());
|
||||
const results = await searchQdrant(embedding);
|
||||
|
||||
if (!results.length) {
|
||||
return res.json({
|
||||
success: true,
|
||||
question,
|
||||
answer: "❌ I could not find this information in the uploaded documents.",
|
||||
sources: [],
|
||||
ms: Date.now() - t0,
|
||||
});
|
||||
}
|
||||
|
||||
const context = buildContext(results);
|
||||
const answer = await askLLM(question, context);
|
||||
|
||||
return res.json({
|
||||
success: true,
|
||||
question,
|
||||
answer,
|
||||
sources: results.map(r => ({
|
||||
score: +r.score.toFixed(4),
|
||||
file: r.payload?.file,
|
||||
page: r.payload?.page,
|
||||
chunk: r.payload?.chunk,
|
||||
})),
|
||||
ms: Date.now() - t0,
|
||||
});
|
||||
|
||||
} catch (err) {
|
||||
console.error("❌ /ask error:", err);
|
||||
return res.status(500).json({ success: false, error: err.message });
|
||||
}
|
||||
}
|
||||
const askstream = async (req, res) => {
|
||||
|
||||
const { question } = req.body ?? {};
|
||||
const user_id = req.user.id;
|
||||
console.log("Received question:", user_id);
|
||||
|
||||
if (!question?.trim()) {
|
||||
return res.status(400).json({ success: false, error: "question is required" });
|
||||
}
|
||||
|
||||
|
||||
|
||||
res.setHeader("Content-Type", "text/event-stream");
|
||||
res.setHeader("Cache-Control", "no-cache");
|
||||
res.setHeader("Connection", "keep-alive");
|
||||
res.flushHeaders();
|
||||
|
||||
const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
||||
|
||||
try {
|
||||
send("status", { message: "🔍 Searching documents..." });
|
||||
const embedding = await createEmbedding(question.trim());
|
||||
|
||||
const results = await searchQdrant(embedding);
|
||||
|
||||
|
||||
if (!results.length) {
|
||||
send("token", { token: "❌", isWord: true });
|
||||
send("token", { token: "I", isWord: true });
|
||||
send("token", { token: "could", isWord: true });
|
||||
send("token", { token: "not", isWord: true });
|
||||
send("token", { token: "find", isWord: true });
|
||||
send("token", { token: "this", isWord: true });
|
||||
send("token", { token: "information", isWord: true });
|
||||
send("token", { token: "in", isWord: true });
|
||||
send("token", { token: "the", isWord: true });
|
||||
send("token", { token: "uploaded", isWord: true });
|
||||
send("token", { token: "documents.", isWord: true });
|
||||
send("done", { sources: [] });
|
||||
|
||||
const result = await postgre.query(
|
||||
'insert into useraskquestion (user_id, questions) values ($1, $2)',
|
||||
[user_id, question]
|
||||
);
|
||||
|
||||
return res.end();
|
||||
}
|
||||
|
||||
const result = await postgre.query(
|
||||
'insert into useraskquestion (user_id, questions,status) values ($1, $2, $3)',
|
||||
[user_id, question,'1']
|
||||
);
|
||||
|
||||
const sources = results.map(r => ({
|
||||
score: +r.score.toFixed(4),
|
||||
file: r.payload?.file,
|
||||
page: r.payload?.page,
|
||||
chunk: r.payload?.chunk,
|
||||
}));
|
||||
send("sources", { sources });
|
||||
|
||||
send("status", { message: "💬 Generating answer..." });
|
||||
|
||||
const context = buildContext(results);
|
||||
const stream = await llm.chat.completions.create({
|
||||
model: CONFIG.azure.deployment,
|
||||
temperature: 0,
|
||||
max_tokens: 1500,
|
||||
stream: true,
|
||||
messages: [
|
||||
{ role: "system", content: SYSTEM_PROMPT },
|
||||
{ role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
|
||||
],
|
||||
});
|
||||
|
||||
// Buffer to handle tokens that may be split mid-word
|
||||
let wordBuffer = "";
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const rawToken = chunk.choices[0]?.delta?.content ?? "";
|
||||
if (!rawToken) continue;
|
||||
|
||||
wordBuffer += rawToken;
|
||||
|
||||
// Split on whitespace — emit complete words, keep trailing partial
|
||||
// We preserve newlines/markdown as separate tokens so markdown renders correctly
|
||||
const parts = wordBuffer.split(/(\s+)/);
|
||||
|
||||
// Last element might be an incomplete word — buffer it
|
||||
wordBuffer = parts.pop() ?? "";
|
||||
|
||||
for (const part of parts) {
|
||||
if (part) {
|
||||
send("token", { token: part, isWord: /\S/.test(part) });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Flush any remaining buffered text
|
||||
if (wordBuffer) {
|
||||
send("token", { token: wordBuffer, isWord: true });
|
||||
}
|
||||
|
||||
send("done", { sources });
|
||||
|
||||
} catch (err) {
|
||||
console.error("❌ /ask/stream error:", err);
|
||||
send("error", { error: err.message });
|
||||
}
|
||||
|
||||
res.end();
|
||||
}
|
||||
|
||||
module.exports = { ask, askstream, health };
|
||||
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
const postgre = require('../database/postgre');
|
||||
const jwt = require('jsonwebtoken');
|
||||
|
||||
const JWT_SECRET = 'secretkey';
|
||||
|
||||
const loginUser = async (req, res) => {
|
||||
try {
|
||||
const { email, password } = req.body;
|
||||
|
||||
if (!email || !password) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
message: 'Email and password are required'
|
||||
});
|
||||
}
|
||||
|
||||
const result = await postgre.query(
|
||||
'SELECT * FROM users WHERE email = $1 AND password = $2',
|
||||
[email, password]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(401).json({
|
||||
success: false,
|
||||
message: 'Invalid email or password'
|
||||
});
|
||||
}
|
||||
|
||||
const user = result.rows[0];
|
||||
|
||||
const token = jwt.sign(
|
||||
{
|
||||
id: user.id,
|
||||
user: user.user
|
||||
},
|
||||
JWT_SECRET,
|
||||
{ expiresIn: '24h' }
|
||||
);
|
||||
|
||||
res.status(200).json({
|
||||
success: true,
|
||||
message: 'Login successful',
|
||||
token
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
message: error.message
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = { loginUser };
|
||||
@@ -0,0 +1,142 @@
|
||||
const { QdrantClient } = require("@qdrant/js-client-rest");
|
||||
const { pipeline } = require("@xenova/transformers");
|
||||
|
||||
const COLLECTION = process.env.QDRANT_COLLECTION
|
||||
const qdrant = new QdrantClient({
|
||||
url: process.env.QDRANT_URL,
|
||||
checkCompatibility: false,
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
|
||||
const _warn = console.warn;
|
||||
console.warn = (msg, ...args) => {
|
||||
if (typeof msg === "string" && (msg.includes("UnknownErrorException") || msg.includes("TT:"))) return;
|
||||
_warn(msg, ...args);
|
||||
};
|
||||
|
||||
|
||||
let embedder = null;
|
||||
async function getEmbedder() {
|
||||
if (!embedder) {
|
||||
embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
|
||||
}
|
||||
return embedder;
|
||||
}
|
||||
|
||||
async function createEmbedding(text) {
|
||||
const model = await getEmbedder();
|
||||
const output = await model(text, { pooling: "mean", normalize: true });
|
||||
return Array.from(output.data);
|
||||
}
|
||||
|
||||
|
||||
async function ensureCollection() {
|
||||
try {
|
||||
await qdrant.getCollection(COLLECTION);
|
||||
} catch {
|
||||
await qdrant.createCollection(COLLECTION, {
|
||||
vectors: { size: 384, distance: "Cosine" },
|
||||
});
|
||||
console.log(`[QDRANT] Collection '${COLLECTION}' created.`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function chunkText(text, chunkSize = 1200, overlap = 250) {
|
||||
const chunks = [];
|
||||
text = text.replace(/\s+/g, " ").trim();
|
||||
let start = 0;
|
||||
while (start < text.length) {
|
||||
let end = start + chunkSize;
|
||||
if (end < text.length) {
|
||||
const lastPeriod = text.lastIndexOf(".", end);
|
||||
if (lastPeriod > start) end = lastPeriod + 1;
|
||||
}
|
||||
const chunk = text.slice(start, end).trim();
|
||||
if (chunk.length > 50) chunks.push(chunk);
|
||||
start = end - overlap;
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
|
||||
async function extractAndInsert(buffer, fileName) {
|
||||
const { getDocument } = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
||||
|
||||
const pdf = await getDocument({ data: new Uint8Array(buffer) }).promise;
|
||||
console.log(`[PDF] ${fileName} — ${pdf.numPages} pages`);
|
||||
|
||||
await ensureCollection();
|
||||
|
||||
const BATCH = 50;
|
||||
let batch = [];
|
||||
let chunkIdx = 0;
|
||||
let total = 0;
|
||||
|
||||
for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
|
||||
console.log(`[PAGE] ${pageNum}/${pdf.numPages}`);
|
||||
|
||||
const page = await pdf.getPage(pageNum);
|
||||
const content = await page.getTextContent();
|
||||
const text = content.items.map(i => i.str).join(" ");
|
||||
|
||||
if (!text || text.trim().length === 0) continue;
|
||||
|
||||
const chunks = chunkText(text);
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const vector = await createEmbedding(chunk);
|
||||
|
||||
batch.push({
|
||||
id: Number(`${Date.now()}${chunkIdx}`),
|
||||
vector,
|
||||
payload: {
|
||||
file: fileName,
|
||||
page: pageNum,
|
||||
chunk: chunkIdx,
|
||||
text: chunk,
|
||||
created_at: new Date().toISOString(),
|
||||
},
|
||||
});
|
||||
|
||||
chunkIdx++;
|
||||
|
||||
if (batch.length >= BATCH) {
|
||||
await qdrant.upsert(COLLECTION, { wait: true, points: batch });
|
||||
console.log(`[UPSERT] ${batch.length} vectors`);
|
||||
total += batch.length;
|
||||
batch = [];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (batch.length > 0) {
|
||||
await qdrant.upsert(COLLECTION, { wait: true, points: batch });
|
||||
console.log(`[UPSERT] ${batch.length} vectors (final)`);
|
||||
total += batch.length;
|
||||
}
|
||||
|
||||
return { fileName, pages: pdf.numPages, vectors: total };
|
||||
}
|
||||
|
||||
|
||||
const upload = async (req, res) => {
|
||||
if (!req.file) {
|
||||
return res.status(400).json({ success: false, error: "No file. Use field name 'pdf'." });
|
||||
}
|
||||
|
||||
const t0 = Date.now();
|
||||
console.log(`[UPLOAD] ${req.file.originalname} (${(req.file.size / 1024).toFixed(1)} KB)`);
|
||||
|
||||
try {
|
||||
const result = await extractAndInsert(req.file.buffer, req.file.originalname);
|
||||
return res.json({ success: true, ms: Date.now() - t0, ...result });
|
||||
} catch (err) {
|
||||
console.error("[QDRANT INSERT ERROR]", err.message);
|
||||
return res.status(500).json({ success: false, error: err.message });
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = { upload };
|
||||
Reference in New Issue
Block a user