first commit

This commit is contained in:
Gitea
2026-06-05 10:25:09 +05:30
commit 0b5715f4b0
10 changed files with 4276 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
AZURE_OPENAI_KEY=o20WLCQfubbGTo0SnkS70lefG0tHvdZlzcUGHOPDmww0igy94Up0JQQJ99CEAC77bzfXJ3w3AAAAACOGIUYb
+1
View File
@@ -0,0 +1 @@
/node_modules
+723
View File
@@ -0,0 +1,723 @@
require("dotenv").config();
const fs = require("fs");
const path = require("path");
// ======================
// SUPPRESS PDF WARNINGS
// ======================
const originalWarn = console.warn;
console.warn = (
message,
...args
) => {
if (
typeof message ===
"string" &&
(
message.includes(
"UnknownErrorException"
) ||
message.includes(
"TT:"
)
)
) {
return;
}
originalWarn(
message,
...args
);
};
// ======================
// PDF.js
// ======================
const pdfjsLib = require(
"pdfjs-dist/legacy/build/pdf.mjs"
);
// ======================
// Transformers
// ======================
const {
pipeline,
} = require("@xenova/transformers");
// ======================
// Qdrant
// ======================
const {
QdrantClient,
} = require("@qdrant/js-client-rest");
// ======================
// QDRANT CONFIG
// ======================
const qdrant = new QdrantClient({
url: "http://20.40.61.65:6333",
checkCompatibility: false,
timeout: 30000,
});
const COLLECTION_NAME =
"pdf_rag";
let embedder;
// ======================
// LOAD MODEL
// ======================
async function loadModel() {
console.log(
"⏳ Loading embedding model..."
);
embedder = await pipeline(
"feature-extraction",
"Xenova/all-MiniLM-L6-v2"
);
console.log(
"✅ Embedding model loaded"
);
}
// ======================
// SMART CHUNKING
// ======================
function chunkText(
text,
chunkSize = 800,
overlap = 150
) {
const chunks = [];
text = text
.replace(/\s+/g, " ")
.trim();
let start = 0;
while (
start < text.length
) {
let end =
start + chunkSize;
// Try sentence ending
if (
end < text.length
) {
const lastPeriod =
text.lastIndexOf(
".",
end
);
if (
lastPeriod >
start
) {
end =
lastPeriod +
1;
}
}
const chunk = text
.slice(start, end)
.trim();
if (
chunk.length > 50
) {
chunks.push(
chunk
);
}
start =
end - overlap;
}
return chunks;
}
// ======================
// CREATE EMBEDDING
// ======================
async function createEmbedding(
text
) {
const output =
await embedder(text, {
pooling: "mean",
normalize: true,
});
return Array.from(
output.data
);
}
// ======================
// CREATE COLLECTION
// ======================
async function createCollection() {
try {
await qdrant.getCollection(
COLLECTION_NAME
);
console.log(
"️ Collection already exists"
);
} catch (err) {
console.log(
"⏳ Creating collection..."
);
await qdrant.createCollection(
COLLECTION_NAME,
{
vectors: {
size: 384,
distance:
"Cosine",
},
}
);
console.log(
"✅ Collection created"
);
}
}
// ======================
// EXTRACT TEXT FROM PDF
// ======================
async function extractTextFromPDF(
filePath
) {
try {
const dataBuffer =
fs.readFileSync(
filePath
);
const uint8Array =
new Uint8Array(
dataBuffer
);
const loadingTask =
pdfjsLib.getDocument(
{
data: uint8Array,
}
);
const pdf =
await loadingTask.promise;
let fullText = "";
console.log(
`📄 Pages: ${pdf.numPages}`
);
for (
let i = 1;
i <= pdf.numPages;
i++
) {
const page =
await pdf.getPage(i);
const content =
await page.getTextContent();
const pageText =
content.items
.map(
(item) =>
item.str
)
.join(" ");
fullText +=
pageText + "\n";
}
return fullText;
} catch (error) {
console.log(
"❌ PDF extraction error:",
error
);
return "";
}
}
// ======================
// PROCESS PDF
// ======================
async function processPDF(filePath, fileName) {
try {
const dataBuffer = fs.readFileSync(filePath);
const pdf = await pdfjsLib
.getDocument({
data: new Uint8Array(dataBuffer),
})
.promise;
console.log(
`📄 ${fileName} - ${pdf.numPages} pages`
);
const batchSize = 50;
let batchPoints = [];
let globalChunkIndex = 0;
for (
let pageNum = 1;
pageNum <= pdf.numPages;
pageNum++
) {
console.log(
`📖 Processing page ${pageNum}/${pdf.numPages}`
);
const page =
await pdf.getPage(pageNum);
const content =
await page.getTextContent();
const pageText =
content.items
.map((item) => item.str)
.join(" ");
if (
!pageText ||
pageText.trim().length === 0
) {
continue;
}
const chunks = chunkText(
pageText,
1200,
250
);
for (const chunk of chunks) {
const embedding =
await createEmbedding(chunk);
batchPoints.push({
id: Number(
`${Date.now()}${globalChunkIndex}`
),
vector: embedding,
payload: {
file: fileName,
page: pageNum,
chunk: globalChunkIndex,
text: chunk,
created_at:
new Date().toISOString(),
},
});
globalChunkIndex++;
if (
batchPoints.length >= batchSize
) {
console.log(
`⬆️ Uploading ${batchPoints.length} vectors`
);
await qdrant.upsert(
COLLECTION_NAME,
{
wait: true,
points: batchPoints,
}
);
batchPoints = [];
}
}
}
if (batchPoints.length > 0) {
console.log(
`⬆️ Uploading final ${batchPoints.length} vectors`
);
await qdrant.upsert(
COLLECTION_NAME,
{
wait: true,
points: batchPoints,
}
);
}
console.log(
`${fileName} indexed successfully`
);
} catch (error) {
console.log(
`❌ Error processing ${fileName}:`,
error
);
}
}
// ======================
// MAIN
// ======================
async function main() {
try {
await loadModel();
await createCollection();
const folder =
path.join(
__dirname,
"uploads"
);
if (
!fs.existsSync(
folder
)
) {
console.log(
"❌ uploads folder not found"
);
return;
}
const files =
fs
.readdirSync(
folder
)
.filter((file) =>
file.endsWith(
".pdf"
)
);
if (
files.length === 0
) {
console.log(
"⚠️ No PDFs found"
);
return;
}
console.log(
`📚 Found ${files.length} PDFs`
);
for (const file of files) {
const filePath =
path.join(
folder,
file
);
console.log(
`\n📄 Processing ${file}`
);
await processPDF(
filePath,
file
);
}
console.log(
"\n🎉 All PDFs indexed successfully"
);
} catch (err) {
console.error(
"❌ MAIN ERROR:",
err
);
}
}
main();
// require("dotenv").config();
// const fs = require("fs");
// const path = require("path");
// const crypto = require("crypto");
// // ─── Suppress PDF warnings ────────────────────────────────────────────────────
// const _warn = console.warn;
// console.warn = (msg, ...a) => {
// if (typeof msg === "string" && (msg.includes("UnknownErrorException") || msg.includes("TT:"))) return;
// _warn(msg, ...a);
// };
// const pdfjsLib = require("pdfjs-dist/legacy/build/pdf.mjs");
// const { pipeline } = require("@xenova/transformers");
// const { QdrantClient } = require("@qdrant/js-client-rest");
// // ─── Config ───────────────────────────────────────────────────────────────────
// const QDRANT_URL = process.env.QDRANT_URL || "http://20.40.61.65:6333";
// const COLLECTION_NAME = "pdf_rag";
// const VECTOR_SIZE = 384;
// const CHUNK_SIZE = 1200;
// const CHUNK_OVERLAP = 250;
// const BATCH_SIZE = 100; // points per upsert call
// const EMBED_CONCURRENCY = 8; // parallel embeddings at once
// const MAX_RETRIES = 3;
// const qdrant = new QdrantClient({ url: QDRANT_URL, checkCompatibility: false, timeout: 60000 });
// let embedder;
// // ─── Semaphore ─────────────────────────────────────────────────────────────────
// class Semaphore {
// constructor(n) { this.n = n; this.queue = []; }
// acquire() {
// return new Promise(res => {
// if (this.n > 0) { this.n--; res(); }
// else this.queue.push(res);
// });
// }
// release() {
// if (this.queue.length) this.queue.shift()();
// else this.n++;
// }
// }
// // ─── Retry helper ─────────────────────────────────────────────────────────────
// async function withRetry(fn, retries = MAX_RETRIES, delay = 500) {
// for (let i = 0; i <= retries; i++) {
// try { return await fn(); }
// catch (err) {
// if (i === retries) throw err;
// console.warn(` ⚠️ Retry ${i + 1}/${retries} after error: ${err.message}`);
// await new Promise(r => setTimeout(r, delay * 2 ** i));
// }
// }
// }
// // ─── Deterministic UUID from content hash ────────────────────────────────────
// // Prevents duplicates if you re-run indexing on the same file
// function makePointId(fileName, page, chunkIndex) {
// const hash = crypto
// .createHash("sha256")
// .update(`${fileName}::${page}::${chunkIndex}`)
// .digest("hex");
// // Qdrant supports UUID strings or unsigned ints; use hex slice as UUID-like string
// return `${hash.slice(0,8)}-${hash.slice(8,12)}-${hash.slice(12,16)}-${hash.slice(16,20)}-${hash.slice(20,32)}`;
// }
// // ─── Chunking ─────────────────────────────────────────────────────────────────
// function chunkText(text, size = CHUNK_SIZE, overlap = CHUNK_OVERLAP) {
// const chunks = [];
// text = text.replace(/\s+/g, " ").trim();
// let start = 0;
// while (start < text.length) {
// let end = start + size;
// if (end < text.length) {
// const last = text.lastIndexOf(".", end);
// if (last > start) end = last + 1;
// }
// const chunk = text.slice(start, end).trim();
// if (chunk.length > 50) chunks.push(chunk);
// start = end - overlap;
// }
// return chunks;
// }
// // ─── Embedding ────────────────────────────────────────────────────────────────
// async function embed(text) {
// const out = await embedder(text, { pooling: "mean", normalize: true });
// return Array.from(out.data);
// }
// // Embed multiple texts with bounded parallelism
// async function embedBatch(texts) {
// const sem = new Semaphore(EMBED_CONCURRENCY);
// return Promise.all(
// texts.map(async (text) => {
// await sem.acquire();
// try { return await embed(text); }
// finally { sem.release(); }
// })
// );
// }
// // ─── Qdrant helpers ───────────────────────────────────────────────────────────
// async function ensureCollection() {
// try {
// await qdrant.getCollection(COLLECTION_NAME);
// console.log("️ Collection already exists");
// } catch {
// console.log("⏳ Creating collection...");
// await qdrant.createCollection(COLLECTION_NAME, {
// vectors: { size: VECTOR_SIZE, distance: "Cosine" },
// // Optimizers: tune for bulk ingest speed, re-enable indexing after
// optimizers_config: { indexing_threshold: 0 },
// });
// console.log("✅ Collection created");
// }
// }
// // Upload a batch with retry
// async function upsertBatch(points) {
// await withRetry(() =>
// qdrant.upsert(COLLECTION_NAME, { wait: true, points })
// );
// }
// // After bulk ingest, re-enable HNSW indexing
// async function enableIndexing() {
// await qdrant.updateCollection(COLLECTION_NAME, {
// optimizers_config: { indexing_threshold: 20000 },
// });
// console.log("🔧 HNSW indexing re-enabled");
// }
// // ─── Check if file already indexed ───────────────────────────────────────────
// async function isFileIndexed(fileName) {
// try {
// const result = await qdrant.scroll(COLLECTION_NAME, {
// filter: { must: [{ key: "file", match: { value: fileName } }] },
// limit: 1,
// with_payload: false,
// with_vector: false,
// });
// return result.points.length > 0;
// } catch { return false; }
// }
// // ─── Process a single PDF ─────────────────────────────────────────────────────
// async function processPDF(filePath, fileName) {
// console.log(`\n📄 ${fileName}`);
// if (await isFileIndexed(fileName)) {
// console.log(` ⏭️ Already indexed — skipping`);
// return;
// }
// const pdf = await pdfjsLib
// .getDocument({ data: new Uint8Array(fs.readFileSync(filePath)) })
// .promise;
// console.log(` 📖 ${pdf.numPages} pages`);
// const allChunks = []; // { text, page, chunkIndex }
// // 1️⃣ Extract all text first (fast, sequential is fine for I/O)
// for (let p = 1; p <= pdf.numPages; p++) {
// const page = await pdf.getPage(p);
// const content = await page.getTextContent();
// const text = content.items.map(i => i.str).join(" ");
// if (!text.trim()) continue;
// const chunks = chunkText(text);
// chunks.forEach((chunk, ci) => allChunks.push({ text: chunk, page: p, chunkIndex: allChunks.length }));
// }
// console.log(` 🧩 ${allChunks.length} chunks — embedding with concurrency=${EMBED_CONCURRENCY}`);
// // 2️⃣ Embed all chunks in parallel (bounded by semaphore)
// const start = Date.now();
// const vectors = await embedBatch(allChunks.map(c => c.text));
// const elapsed = ((Date.now() - start) / 1000).toFixed(1);
// console.log(` ⚡ Embedding done in ${elapsed}s`);
// // 3️⃣ Build points
// const points = allChunks.map((c, i) => ({
// id: makePointId(fileName, c.page, c.chunkIndex),
// vector: vectors[i],
// payload: {
// file: fileName,
// page: c.page,
// chunk: c.chunkIndex,
// text: c.text,
// created_at: new Date().toISOString(),
// },
// }));
// // 4️⃣ Batch upsert with progress
// let uploaded = 0;
// for (let i = 0; i < points.length; i += BATCH_SIZE) {
// const batch = points.slice(i, i + BATCH_SIZE);
// await upsertBatch(batch);
// uploaded += batch.length;
// process.stdout.write(`\r ⬆️ ${uploaded}/${points.length} vectors uploaded`);
// }
// console.log(`\n ✅ ${fileName} indexed`);
// }
// // ─── Main ─────────────────────────────────────────────────────────────────────
// async function main() {
// console.log("⏳ Loading embedding model...");
// embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
// console.log("✅ Model loaded\n");
// await ensureCollection();
// const folder = path.join(__dirname, "uploads");
// if (!fs.existsSync(folder)) return console.log("❌ uploads/ folder not found");
// const pdfs = fs.readdirSync(folder).filter(f => f.endsWith(".pdf"));
// if (!pdfs.length) return console.log("⚠️ No PDFs found");
// console.log(`📚 Found ${pdfs.length} PDF(s)\n`);
// const t0 = Date.now();
// for (const file of pdfs) {
// await processPDF(path.join(folder, file), file);
// }
// await enableIndexing(); // re-enable HNSW after bulk load
// console.log(`\n🎉 Done in ${((Date.now() - t0) / 1000).toFixed(1)}s`);
// }
// main().catch(err => { console.error("❌ Fatal:", err); process.exit(1); });
+2565
View File
File diff suppressed because it is too large Load Diff
+28
View File
@@ -0,0 +1,28 @@
{
"name": "server",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "nodemon server.js"
},
"keywords": [],
"author": "",
"license": "ISC",
"type": "commonjs",
"dependencies": {
"@qdrant/js-client-rest": "^1.18.0",
"@xenova/transformers": "^2.17.2",
"axios": "^1.16.0",
"cors": "^2.8.6",
"dotenv": "^17.4.2",
"express": "^5.2.1",
"multer": "^2.1.1",
"ollama": "^0.6.3",
"openai": "^6.42.0",
"pdf2pic": "^3.2.0",
"pdfjs-dist": "^5.7.284",
"tesseract.js": "^7.0.0"
}
}
+958
View File
@@ -0,0 +1,958 @@
// //
// require("dotenv").config();
// const express = require("express");
// const cors = require("cors");
// const OpenAI = require("openai");
// const {
// pipeline,
// } = require("@xenova/transformers");
// const {
// QdrantClient,
// } = require("@qdrant/js-client-rest");
// const app = express();
// app.use(cors());
// app.use(express.json());
// // ======================
// // AZURE OPENAI
// // ======================
// const azureEndpoint =
// "https://cpmindiayoda-resource.services.ai.azure.com";
// const deploymentName = "gpt-4o-mini";
// const apiVersion =
// "2024-08-01-preview";
// const llm = new OpenAI({
// baseURL:
// `${azureEndpoint}/openai/deployments/${deploymentName}`,
// apiKey:
// process.env.AZURE_OPENAI_KEY,
// defaultHeaders: {
// "api-key":
// process.env.AZURE_OPENAI_KEY,
// },
// defaultQuery: {
// "api-version":
// apiVersion,
// },
// });
// // ======================
// // QDRANT
// // ======================
// const qdrant = new QdrantClient({
// url: "http://20.40.61.65:6333",
// checkCompatibility: false,
// timeout: 30000,
// });
// const COLLECTION_NAME =
// "pdf_rag";
// let embedder;
// // ======================
// // LOAD EMBEDDING MODEL
// // ======================
// async function loadModel() {
// console.log(
// "Loading MiniLM model..."
// );
// embedder = await pipeline(
// "feature-extraction",
// "Xenova/all-MiniLM-L6-v2"
// );
// console.log(
// "Embedding model loaded"
// );
// }
// // ======================
// // EMBEDDING
// // ======================
// async function createEmbedding(
// text
// ) {
// const output =
// await embedder(text, {
// pooling: "mean",
// normalize: true,
// });
// return Array.from(output.data);
// }
// // ======================
// // HEALTH
// // ======================
// app.get("/", (req, res) => {
// res.json({
// success: true,
// message:
// "Qdrant + Azure GPT RAG Running",
// });
// });
// // ======================
// // ASK API
// // ======================
// app.post(
// "/ask",
// async (req, res) => {
// try {
// const { question } =
// req.body;
// if (!question) {
// return res
// .status(400)
// .json({
// success: false,
// error:
// "Question is required",
// });
// }
// console.log(
// "Question:",
// question
// );
// // ======================
// // CREATE EMBEDDING
// // ======================
// const embedding =
// await createEmbedding(
// question
// );
// // ======================
// // SEARCH QDRANT
// // ======================
// const searchResult =
// await qdrant.search(
// COLLECTION_NAME,
// {
// vector: embedding,
// limit: 20,
// }
// );
// const filteredResults = searchResult.filter(
// item => item.score >= 0.10
// );
// console.log(
// "Results:",
// filteredResults.length,
// );
// if (
// !filteredResults.length
// ) {
// return res.json({
// success: true,
// answer:
// "No relevant information found.",
// sources: [],
// });
// }
// // ======================
// // CONTEXT
// // ======================
// const context =
// filteredResults
// .map(
// (item, index) => `
// Result ${index + 1}
// File:
// ${item.payload?.file || ""}
// Content:
// ${item.payload?.text || ""}
// `
// )
// .join("\n\n");
// // ======================
// // GPT CALL
// // ======================
// const completion =
// await llm.chat.completions.create(
// {
// model:
// deploymentName,
// temperature: 0,
// messages: [
// {
// role: "system",
// content: `
// You are CPM AI Assistant.
// Rules:
// - Answer ONLY from the provided context.
// - If information is not found, say:
// "❌ I could not find this information in the uploaded documents."
// Response Style:
// - Use emojis where appropriate.
// - Use markdown formatting.
// - Use headings.
// - Use bullet points.
// - Make answers professional and easy to read.
// - Highlight important information using **bold** text.
// - Never mention the context or document chunks.
// Example Format:
// # 📋 Dress Code Policy
// ## 🎯 Overview
// Brief summary here.
// ## ✅ Key Points
// • Point 1
// • Point 2
// • Point 3
// ## ⚠️ Important Notes
// • Note 1
// • Note 2
// ## 📝 Conclusion
// Short conclusion.
// `,
// },
// {
// role: "user",
// content: `
// Context:
// ${context}
// Question:
// ${question}
// `,
// },
// ]
// }
// );
// const answer =
// completion.choices[0]
// .message.content;
// return res.json({
// success: true,
// question,
// answer,
// sources:
// filteredResults.map(
// (item) => ({
// score:
// item.score,
// file:
// item.payload
// ?.file,
// chunk:
// item.payload
// ?.chunk,
// })
// ),
// });
// } catch (error) {
// console.error(
// "ERROR:",
// error
// );
// return res
// .status(500)
// .json({
// success: false,
// error:
// error.message,
// });
// }
// }
// );
// // ======================
// // START SERVER
// // ======================
// async function startServer() {
// try {
// await loadModel();
// app.listen(
// process.env.PORT ||
// 5000,
// () => {
// console.log(
// "Server running on port",
// process.env.PORT ||
// 5000
// );
// }
// );
// } catch (error) {
// console.error(
// "Startup Error:",
// error
// );
// }
// }
// startServer();
// require("dotenv").config();
// const express = require("express");
// const cors = require("cors");
// const OpenAI = require("openai");
// const { pipeline } = require("@xenova/transformers");
// const { QdrantClient } = require("@qdrant/js-client-rest");
// // ─── Config ───────────────────────────────────────────────────────────────────
// const CONFIG = {
// azure: {
// endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com",
// deployment: process.env.AZURE_DEPLOYMENT || "gpt-4o-mini",
// apiVersion: process.env.AZURE_API_VERSION || "2024-08-01-preview",
// apiKey: process.env.AZURE_OPENAI_KEY,
// },
// qdrant: {
// url: process.env.QDRANT_URL || "http://20.40.61.65:6333",
// collection: process.env.QDRANT_COLLECTION || "pdf_rag",
// },
// search: {
// topK: 20,
// minScore: 0.10,
// maxContextDocs: 10,
// },
// port: process.env.PORT || 5000,
// };
// // ─── Clients ──────────────────────────────────────────────────────────────────
// const llm = new OpenAI({
// baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
// apiKey: CONFIG.azure.apiKey,
// defaultHeaders: { "api-key": CONFIG.azure.apiKey },
// defaultQuery: { "api-version": CONFIG.azure.apiVersion },
// });
// const qdrant = new QdrantClient({
// url: CONFIG.qdrant.url,
// checkCompatibility: false,
// timeout: 30000,
// });
// // ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
// let _embedder = null;
// async function getEmbedder() {
// if (!_embedder) {
// console.log("⏳ Loading MiniLM model...");
// _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
// console.log("✅ Embedding model ready");
// }
// return _embedder;
// }
// async function createEmbedding(text) {
// const model = await getEmbedder();
// const out = await model(text, { pooling: "mean", normalize: true });
// return Array.from(out.data);
// }
// // ─── Qdrant search ────────────────────────────────────────────────────────────
// async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
// const results = await qdrant.search(CONFIG.qdrant.collection, {
// vector: embedding,
// limit: topK,
// with_payload: true,
// score_threshold: minScore, // let Qdrant filter — faster than client-side
// });
// // Re-rank by score, cap to maxContextDocs
// return results
// .sort((a, b) => b.score - a.score)
// .slice(0, maxContextDocs);
// }
// // ─── Build LLM context string ─────────────────────────────────────────────────
// function buildContext(results) {
// return results
// .map((item, i) =>
// `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
// )
// .join("\n\n---\n\n");
// }
// // ─── LLM call ─────────────────────────────────────────────────────────────────
// const SYSTEM_PROMPT = `
// You are CPM AI Assistant.
// Rules:
// - Answer only from the provided information.
// - If the answer is not available, reply exactly:
// "❌ I could not find this information in the uploaded documents."
// - Do not make up information.
// - Do not mention documents, context, or chunks.
// Response Style:
// - Use simple English.
// - Keep answers short and clear.
// - Use headings and bullet points.
// - Highlight important words in **bold**.
// - Use emojis in headings.
// Format:
// # 📋 Topic
// ## 🎯 Summary
// Short answer in 1-2 sentences.
// ## ✅ Details
// - Point 1
// - Point 2
// - Point 3
// ## ⚠️ Notes
// - Extra information (if available).
// `.trim();
// async function askLLM(question, context) {
// const completion = await llm.chat.completions.create({
// model: CONFIG.azure.deployment,
// temperature: 0,
// max_tokens: 1500,
// messages: [
// { role: "system", content: SYSTEM_PROMPT },
// { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
// ],
// });
// return completion.choices[0].message.content;
// }
// // ─── Express app ──────────────────────────────────────────────────────────────
// const app = express();
// app.use(cors());
// app.use(express.json({ limit: "1mb" }));
// // Request logger middleware
// app.use((req, _res, next) => {
// console.log(`→ ${req.method} ${req.path}`);
// next();
// });
// // ─── Routes ───────────────────────────────────────────────────────────────────
// app.get("/health", (_req, res) => {
// res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
// });
// app.post("/ask", async (req, res) => {
// const { question } = req.body ?? {};
// if (!question?.trim()) {
// return res.status(400).json({ success: false, error: "question is required" });
// }
// const t0 = Date.now();
// try {
// // 1. Embed question
// const embedding = await createEmbedding(question.trim());
// // 2. Semantic search
// const results = await searchQdrant(embedding);
// if (!results.length) {
// return res.json({
// success: true,
// question,
// answer: "❌ I could not find this information in the uploaded documents.",
// sources: [],
// ms: Date.now() - t0,
// });
// }
// // 3. Build context + call LLM
// const context = buildContext(results);
// const answer = await askLLM(question, context);
// return res.json({
// success: true,
// question,
// answer,
// sources: results.map(r => ({
// score: +r.score.toFixed(4),
// file: r.payload?.file,
// page: r.payload?.page,
// chunk: r.payload?.chunk,
// })),
// ms: Date.now() - t0,
// });
// } catch (err) {
// console.error("❌ /ask error:", err);
// return res.status(500).json({ success: false, error: err.message });
// }
// });
// app.post("/ask/stream", async (req, res) => {
// const { question } = req.body ?? {};
// if (!question?.trim()) {
// return res.status(400).json({ success: false, error: "question is required" });
// }
// // ── SSE headers ────────────────────────────────────────────────────────────
// res.setHeader("Content-Type", "text/event-stream");
// res.setHeader("Cache-Control", "no-cache");
// res.setHeader("Connection", "keep-alive");
// res.flushHeaders(); // send headers immediately
// const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
// try {
// // 1. Embed
// send("status", { message: "🔍 Searching documents..." });
// const embedding = await createEmbedding(question.trim());
// // 2. Search Qdrant
// const results = await searchQdrant(embedding);
// if (!results.length) {
// send("token", { token: "❌ I could not find this information in the uploaded documents." });
// send("done", { sources: [] });
// return res.end();
// }
// // 3. Send sources early so UI can show them while streaming answer
// const sources = results.map(r => ({
// score: +r.score.toFixed(4),
// file: r.payload?.file,
// page: r.payload?.page,
// chunk: r.payload?.chunk,
// }));
// send("sources", { sources });
// // 4. Stream LLM tokens
// send("status", { message: "💬 Generating answer..." });
// const context = buildContext(results);
// const stream = await llm.chat.completions.create({
// model: CONFIG.azure.deployment,
// temperature: 0,
// max_tokens: 1500,
// stream: true, // ← key change
// messages: [
// { role: "system", content: SYSTEM_PROMPT },
// { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
// ],
// });
// for await (const chunk of stream) {
// const token = chunk.choices[0]?.delta?.content ?? "";
// if (token) send("token", { token });
// }
// send("done", { sources });
// } catch (err) {
// console.error("❌ /ask/stream error:", err);
// send("error", { error: err.message });
// }
// res.end();
// });
// app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" }));
// // ─── Start ────────────────────────────────────────────────────────────────────
// async function start() {
// await getEmbedder();
// app.listen(CONFIG.port, () => {
// console.log(`Server running on port ${CONFIG.port}`);
// });
// }
// start().catch(err => {
// console.error("Fatal startup error:", err);
// process.exit(1);
// });
require("dotenv").config();
const express = require("express");
const cors = require("cors");
const OpenAI = require("openai");
const { pipeline } = require("@xenova/transformers");
const { QdrantClient } = require("@qdrant/js-client-rest");
// ─── Config ───────────────────────────────────────────────────────────────────
const CONFIG = {
azure: {
endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com",
deployment: process.env.AZURE_DEPLOYMENT || "gpt-4o-mini",
apiVersion: process.env.AZURE_API_VERSION || "2024-08-01-preview",
apiKey: process.env.AZURE_OPENAI_KEY,
},
qdrant: {
url: process.env.QDRANT_URL || "http://20.40.61.65:6333",
collection: process.env.QDRANT_COLLECTION || "pdf_rag",
},
search: {
topK: 20,
minScore: 0.10,
maxContextDocs: 10,
},
port: process.env.PORT || 5000,
};
// ─── Clients ──────────────────────────────────────────────────────────────────
const llm = new OpenAI({
baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`,
apiKey: CONFIG.azure.apiKey,
defaultHeaders: { "api-key": CONFIG.azure.apiKey },
defaultQuery: { "api-version": CONFIG.azure.apiVersion },
});
const qdrant = new QdrantClient({
url: CONFIG.qdrant.url,
checkCompatibility: false,
timeout: 30000,
});
// ─── Embedding model (singleton, lazy-init) ───────────────────────────────────
let _embedder = null;
async function getEmbedder() {
if (!_embedder) {
console.log("⏳ Loading MiniLM model...");
_embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2");
console.log("✅ Embedding model ready");
}
return _embedder;
}
async function createEmbedding(text) {
const model = await getEmbedder();
const out = await model(text, { pooling: "mean", normalize: true });
return Array.from(out.data);
}
// ─── Qdrant search ────────────────────────────────────────────────────────────
async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) {
const results = await qdrant.search(CONFIG.qdrant.collection, {
vector: embedding,
limit: topK,
with_payload: true,
score_threshold: minScore,
});
return results
.sort((a, b) => b.score - a.score)
.slice(0, maxContextDocs);
}
// ─── Build LLM context string ─────────────────────────────────────────────────
function buildContext(results) {
return results
.map((item, i) =>
`[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}`
)
.join("\n\n---\n\n");
}
// ─── LLM call ─────────────────────────────────────────────────────────────────
const SYSTEM_PROMPT = `
You are CPM AI Assistant.
Rules:
- Answer only from the provided information.
- If the answer is not available, reply exactly:
"❌ I could not find this information in the uploaded documents."
- Do not make up information.
- Do not mention documents, context, or chunks.
Response Style:
- Use simple English.
- Keep answers short and clear.
- Use headings and bullet points.
- Highlight important words in **bold**.
- Use emojis in headings.
Format:
# 📋 Topic
## 🎯 Summary
Short answer in 1-2 sentences.
## ✅ Details
- Point 1
- Point 2
- Point 3
## ⚠️ Notes
- Extra information (if available).
`.trim();
async function askLLM(question, context) {
const completion = await llm.chat.completions.create({
model: CONFIG.azure.deployment,
temperature: 0,
max_tokens: 1500,
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
],
});
return completion.choices[0].message.content;
}
// ─── Express app ──────────────────────────────────────────────────────────────
const app = express();
app.use(cors());
app.use(express.json({ limit: "1mb" }));
app.use(express.urlencoded({ extended: true }));
app.use((req, _res, next) => {
console.log(`${req.method} ${req.path}`);
next();
});
// ─── Routes ───────────────────────────────────────────────────────────────────
app.get("/health", (_req, res) => {
res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection });
});
app.post("/ask", async (req, res) => {
const { question } = req.body ?? {};
if (!question?.trim()) {
return res.status(400).json({ success: false, error: "question is required" });
}
const t0 = Date.now();
try {
const embedding = await createEmbedding(question.trim());
const results = await searchQdrant(embedding);
if (!results.length) {
return res.json({
success: true,
question,
answer: "❌ I could not find this information in the uploaded documents.",
sources: [],
ms: Date.now() - t0,
});
}
const context = buildContext(results);
const answer = await askLLM(question, context);
return res.json({
success: true,
question,
answer,
sources: results.map(r => ({
score: +r.score.toFixed(4),
file: r.payload?.file,
page: r.payload?.page,
chunk: r.payload?.chunk,
})),
ms: Date.now() - t0,
});
} catch (err) {
console.error("❌ /ask error:", err);
return res.status(500).json({ success: false, error: err.message });
}
});
// ─── /ask/stream — word-by-word SSE ──────────────────────────────────────────
// The LLM streams tokens (which may be partial words or multi-word chunks).
// We split every incoming token on whitespace and emit each word as a separate
// SSE "token" event so the frontend can animate them one-by-one.
app.post("/ask/stream", async (req, res) => {
const { question } = req.body ?? {};
if (!question?.trim()) {
return res.status(400).json({ success: false, error: "question is required" });
}
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
res.flushHeaders();
const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
try {
send("status", { message: "🔍 Searching documents..." });
const embedding = await createEmbedding(question.trim());
const results = await searchQdrant(embedding);
if (!results.length) {
send("token", { token: "❌", isWord: true });
send("token", { token: "I", isWord: true });
send("token", { token: "could", isWord: true });
send("token", { token: "not", isWord: true });
send("token", { token: "find", isWord: true });
send("token", { token: "this", isWord: true });
send("token", { token: "information", isWord: true });
send("token", { token: "in", isWord: true });
send("token", { token: "the", isWord: true });
send("token", { token: "uploaded", isWord: true });
send("token", { token: "documents.", isWord: true });
send("done", { sources: [] });
return res.end();
}
const sources = results.map(r => ({
score: +r.score.toFixed(4),
file: r.payload?.file,
page: r.payload?.page,
chunk: r.payload?.chunk,
}));
send("sources", { sources });
send("status", { message: "💬 Generating answer..." });
const context = buildContext(results);
const stream = await llm.chat.completions.create({
model: CONFIG.azure.deployment,
temperature: 0,
max_tokens: 1500,
stream: true,
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` },
],
});
// Buffer to handle tokens that may be split mid-word
let wordBuffer = "";
for await (const chunk of stream) {
const rawToken = chunk.choices[0]?.delta?.content ?? "";
if (!rawToken) continue;
wordBuffer += rawToken;
// Split on whitespace — emit complete words, keep trailing partial
// We preserve newlines/markdown as separate tokens so markdown renders correctly
const parts = wordBuffer.split(/(\s+)/);
// Last element might be an incomplete word — buffer it
wordBuffer = parts.pop() ?? "";
for (const part of parts) {
if (part) {
send("token", { token: part, isWord: /\S/.test(part) });
}
}
}
// Flush any remaining buffered text
if (wordBuffer) {
send("token", { token: wordBuffer, isWord: true });
}
send("done", { sources });
} catch (err) {
console.error("❌ /ask/stream error:", err);
send("error", { error: err.message });
}
res.end();
});
app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" }));
// ─── Start ────────────────────────────────────────────────────────────────────
async function start() {
await getEmbedder();
app.listen(CONFIG.port, () => {
console.log(`Server running on port ${CONFIG.port}`);
});
}
start().catch(err => {
console.error("Fatal startup error:", err);
process.exit(1);
});
Binary file not shown.
Binary file not shown.