diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml index b0f8c41..10790ff 100644 --- a/.gitea/workflows/test.yml +++ b/.gitea/workflows/test.yml @@ -14,9 +14,9 @@ jobs: - name: Copy files to VPS uses: appleboy/scp-action@master with: - host: 172.236.185.26 - username: root - password: LinodeUser#26 + host: ${{ secrets.VPS_HOST }} + username: ${{ secrets.VPS_USER }} + password: ${{ secrets.VPS_PASSWORD }} port: 22 source: "./*" target: "/root/gyanBuddy" @@ -25,9 +25,9 @@ jobs: - name: Execute SSH Commands uses: appleboy/ssh-action@master with: - host: 172.236.185.26 - username: root - password: LinodeUser#26 + host: ${{ secrets.VPS_HOST }} + username: ${{ secrets.VPS_USER }} + password: ${{ secrets.VPS_PASSWORD }} port: 22 script: | cd /root/gyanBuddy diff --git a/.gitignore b/.gitignore index 07e6e47..f189a18 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /node_modules +/.env \ No newline at end of file diff --git a/server.js b/server.js index df1fb62..6b7fa96 100644 --- a/server.js +++ b/server.js @@ -1,654 +1,3 @@ -// // -// require("dotenv").config(); - -// const express = require("express"); -// const cors = require("cors"); -// const OpenAI = require("openai"); - -// const { -// pipeline, -// } = require("@xenova/transformers"); - -// const { -// QdrantClient, -// } = require("@qdrant/js-client-rest"); - -// const app = express(); - -// app.use(cors()); -// app.use(express.json()); - -// // ====================== -// // AZURE OPENAI -// // ====================== - -// const azureEndpoint = -// "https://cpmindiayoda-resource.services.ai.azure.com"; - -// const deploymentName = "gpt-4o-mini"; - -// const apiVersion = -// "2024-08-01-preview"; - -// const llm = new OpenAI({ -// baseURL: -// `${azureEndpoint}/openai/deployments/${deploymentName}`, - -// apiKey: -// process.env.AZURE_OPENAI_KEY, - -// defaultHeaders: { -// "api-key": -// process.env.AZURE_OPENAI_KEY, -// }, - -// defaultQuery: { -// "api-version": -// apiVersion, -// }, -// }); - -// // ====================== -// // QDRANT -// // ====================== - -// const qdrant = new QdrantClient({ -// url: "http://20.40.61.65:6333", -// checkCompatibility: false, -// timeout: 30000, -// }); - -// const COLLECTION_NAME = -// "pdf_rag"; - -// let embedder; - -// // ====================== -// // LOAD EMBEDDING MODEL -// // ====================== - -// async function loadModel() { -// console.log( -// "Loading MiniLM model..." -// ); - -// embedder = await pipeline( -// "feature-extraction", -// "Xenova/all-MiniLM-L6-v2" -// ); - -// console.log( -// "Embedding model loaded" -// ); -// } - -// // ====================== -// // EMBEDDING -// // ====================== - -// async function createEmbedding( -// text -// ) { -// const output = -// await embedder(text, { -// pooling: "mean", -// normalize: true, -// }); - -// return Array.from(output.data); -// } - -// // ====================== -// // HEALTH -// // ====================== - -// app.get("/", (req, res) => { -// res.json({ -// success: true, -// message: -// "Qdrant + Azure GPT RAG Running", -// }); -// }); - -// // ====================== -// // ASK API -// // ====================== - -// app.post( -// "/ask", -// async (req, res) => { -// try { -// const { question } = -// req.body; - -// if (!question) { -// return res -// .status(400) -// .json({ -// success: false, -// error: -// "Question is required", -// }); -// } - -// console.log( -// "Question:", -// question -// ); - -// // ====================== -// // CREATE EMBEDDING -// // ====================== - -// const embedding = -// await createEmbedding( -// question -// ); - -// // ====================== -// // SEARCH QDRANT -// // ====================== - -// const searchResult = -// await qdrant.search( -// COLLECTION_NAME, -// { -// vector: embedding, -// limit: 20, -// } -// ); -// const filteredResults = searchResult.filter( -// item => item.score >= 0.10 -// ); - -// console.log( -// "Results:", -// filteredResults.length, - - -// ); - -// if ( -// !filteredResults.length -// ) { -// return res.json({ -// success: true, -// answer: -// "No relevant information found.", -// sources: [], -// }); -// } - -// // ====================== -// // CONTEXT -// // ====================== - -// const context = -// filteredResults -// .map( -// (item, index) => ` -// Result ${index + 1} - -// File: -// ${item.payload?.file || ""} - -// Content: -// ${item.payload?.text || ""} -// ` -// ) -// .join("\n\n"); - -// // ====================== -// // GPT CALL -// // ====================== - -// const completion = -// await llm.chat.completions.create( -// { -// model: -// deploymentName, - -// temperature: 0, - -// messages: [ -// { -// role: "system", -// content: ` -// You are CPM AI Assistant. - -// Rules: -// - Answer ONLY from the provided context. -// - If information is not found, say: -// "❌ I could not find this information in the uploaded documents." - -// Response Style: -// - Use emojis where appropriate. -// - Use markdown formatting. -// - Use headings. -// - Use bullet points. -// - Make answers professional and easy to read. -// - Highlight important information using **bold** text. -// - Never mention the context or document chunks. - -// Example Format: - -// # 📋 Dress Code Policy - -// ## 🎯 Overview -// Brief summary here. - -// ## ✅ Key Points -// • Point 1 -// • Point 2 -// • Point 3 - -// ## ⚠️ Important Notes -// • Note 1 -// • Note 2 - -// ## 📝 Conclusion -// Short conclusion. -// `, -// }, -// { -// role: "user", -// content: ` -// Context: -// ${context} - -// Question: -// ${question} -// `, -// }, -// ] -// } -// ); - -// const answer = -// completion.choices[0] -// .message.content; - -// return res.json({ -// success: true, - -// question, - -// answer, - -// sources: -// filteredResults.map( -// (item) => ({ -// score: -// item.score, - -// file: -// item.payload -// ?.file, - -// chunk: -// item.payload -// ?.chunk, -// }) -// ), -// }); -// } catch (error) { -// console.error( -// "ERROR:", -// error -// ); - -// return res -// .status(500) -// .json({ -// success: false, -// error: -// error.message, -// }); -// } -// } -// ); - -// // ====================== -// // START SERVER -// // ====================== - -// async function startServer() { -// try { -// await loadModel(); - -// app.listen( -// process.env.PORT || -// 5000, -// () => { -// console.log( -// "Server running on port", -// process.env.PORT || -// 5000 -// ); -// } -// ); -// } catch (error) { -// console.error( -// "Startup Error:", -// error -// ); -// } -// } - -// startServer(); - -// require("dotenv").config(); - -// const express = require("express"); -// const cors = require("cors"); -// const OpenAI = require("openai"); -// const { pipeline } = require("@xenova/transformers"); -// const { QdrantClient } = require("@qdrant/js-client-rest"); - -// // ─── Config ─────────────────────────────────────────────────────────────────── -// const CONFIG = { -// azure: { -// endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com", -// deployment: process.env.AZURE_DEPLOYMENT || "gpt-4o-mini", -// apiVersion: process.env.AZURE_API_VERSION || "2024-08-01-preview", -// apiKey: process.env.AZURE_OPENAI_KEY, -// }, -// qdrant: { -// url: process.env.QDRANT_URL || "http://20.40.61.65:6333", -// collection: process.env.QDRANT_COLLECTION || "pdf_rag", -// }, -// search: { -// topK: 20, -// minScore: 0.10, -// maxContextDocs: 10, -// }, -// port: process.env.PORT || 5000, -// }; - -// // ─── Clients ────────────────────────────────────────────────────────────────── -// const llm = new OpenAI({ -// baseURL: `${CONFIG.azure.endpoint}/openai/deployments/${CONFIG.azure.deployment}`, -// apiKey: CONFIG.azure.apiKey, -// defaultHeaders: { "api-key": CONFIG.azure.apiKey }, -// defaultQuery: { "api-version": CONFIG.azure.apiVersion }, -// }); - -// const qdrant = new QdrantClient({ -// url: CONFIG.qdrant.url, -// checkCompatibility: false, -// timeout: 30000, -// }); - -// // ─── Embedding model (singleton, lazy-init) ─────────────────────────────────── -// let _embedder = null; -// async function getEmbedder() { -// if (!_embedder) { -// console.log("⏳ Loading MiniLM model..."); -// _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2"); -// console.log("✅ Embedding model ready"); -// } -// return _embedder; -// } - -// async function createEmbedding(text) { -// const model = await getEmbedder(); -// const out = await model(text, { pooling: "mean", normalize: true }); -// return Array.from(out.data); -// } - -// // ─── Qdrant search ──────────────────────────────────────────────────────────── -// async function searchQdrant(embedding, { topK, minScore, maxContextDocs } = CONFIG.search) { -// const results = await qdrant.search(CONFIG.qdrant.collection, { -// vector: embedding, -// limit: topK, -// with_payload: true, -// score_threshold: minScore, // let Qdrant filter — faster than client-side -// }); - -// // Re-rank by score, cap to maxContextDocs -// return results -// .sort((a, b) => b.score - a.score) -// .slice(0, maxContextDocs); -// } - -// // ─── Build LLM context string ───────────────────────────────────────────────── -// function buildContext(results) { -// return results -// .map((item, i) => -// `[${i + 1}] File: ${item.payload?.file ?? "unknown"} | Page: ${item.payload?.page ?? "?"}\n${item.payload?.text ?? ""}` -// ) -// .join("\n\n---\n\n"); -// } - -// // ─── LLM call ───────────────────────────────────────────────────────────────── -// const SYSTEM_PROMPT = ` -// You are CPM AI Assistant. - -// Rules: -// - Answer only from the provided information. -// - If the answer is not available, reply exactly: -// "❌ I could not find this information in the uploaded documents." -// - Do not make up information. -// - Do not mention documents, context, or chunks. - -// Response Style: -// - Use simple English. -// - Keep answers short and clear. -// - Use headings and bullet points. -// - Highlight important words in **bold**. -// - Use emojis in headings. - -// Format: - -// # 📋 Topic - -// ## 🎯 Summary -// Short answer in 1-2 sentences. - -// ## ✅ Details -// - Point 1 -// - Point 2 -// - Point 3 - -// ## ⚠️ Notes -// - Extra information (if available). -// `.trim(); - -// async function askLLM(question, context) { -// const completion = await llm.chat.completions.create({ -// model: CONFIG.azure.deployment, -// temperature: 0, -// max_tokens: 1500, -// messages: [ -// { role: "system", content: SYSTEM_PROMPT }, -// { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` }, -// ], -// }); - -// return completion.choices[0].message.content; -// } - -// // ─── Express app ────────────────────────────────────────────────────────────── -// const app = express(); -// app.use(cors()); -// app.use(express.json({ limit: "1mb" })); - -// // Request logger middleware -// app.use((req, _res, next) => { -// console.log(`→ ${req.method} ${req.path}`); -// next(); -// }); - -// // ─── Routes ─────────────────────────────────────────────────────────────────── -// app.get("/health", (_req, res) => { -// res.json({ status: "ok", model: CONFIG.azure.deployment, collection: CONFIG.qdrant.collection }); -// }); - -// app.post("/ask", async (req, res) => { -// const { question } = req.body ?? {}; - -// if (!question?.trim()) { -// return res.status(400).json({ success: false, error: "question is required" }); -// } - -// const t0 = Date.now(); - -// try { -// // 1. Embed question -// const embedding = await createEmbedding(question.trim()); - -// // 2. Semantic search -// const results = await searchQdrant(embedding); - -// if (!results.length) { -// return res.json({ -// success: true, -// question, -// answer: "❌ I could not find this information in the uploaded documents.", -// sources: [], -// ms: Date.now() - t0, -// }); -// } - -// // 3. Build context + call LLM -// const context = buildContext(results); -// const answer = await askLLM(question, context); - -// return res.json({ -// success: true, -// question, -// answer, -// sources: results.map(r => ({ -// score: +r.score.toFixed(4), -// file: r.payload?.file, -// page: r.payload?.page, -// chunk: r.payload?.chunk, -// })), -// ms: Date.now() - t0, -// }); - -// } catch (err) { -// console.error("❌ /ask error:", err); -// return res.status(500).json({ success: false, error: err.message }); -// } -// }); - -// app.post("/ask/stream", async (req, res) => { -// const { question } = req.body ?? {}; - -// if (!question?.trim()) { -// return res.status(400).json({ success: false, error: "question is required" }); -// } - -// // ── SSE headers ──────────────────────────────────────────────────────────── -// res.setHeader("Content-Type", "text/event-stream"); -// res.setHeader("Cache-Control", "no-cache"); -// res.setHeader("Connection", "keep-alive"); -// res.flushHeaders(); // send headers immediately - -// const send = (event, data) => res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`); - -// try { -// // 1. Embed -// send("status", { message: "🔍 Searching documents..." }); -// const embedding = await createEmbedding(question.trim()); - -// // 2. Search Qdrant -// const results = await searchQdrant(embedding); - -// if (!results.length) { -// send("token", { token: "❌ I could not find this information in the uploaded documents." }); -// send("done", { sources: [] }); -// return res.end(); -// } - -// // 3. Send sources early so UI can show them while streaming answer -// const sources = results.map(r => ({ -// score: +r.score.toFixed(4), -// file: r.payload?.file, -// page: r.payload?.page, -// chunk: r.payload?.chunk, -// })); -// send("sources", { sources }); - -// // 4. Stream LLM tokens -// send("status", { message: "💬 Generating answer..." }); - -// const context = buildContext(results); -// const stream = await llm.chat.completions.create({ -// model: CONFIG.azure.deployment, -// temperature: 0, -// max_tokens: 1500, -// stream: true, // ← key change -// messages: [ -// { role: "system", content: SYSTEM_PROMPT }, -// { role: "user", content: `Context:\n${context}\n\nQuestion:\n${question}` }, -// ], -// }); - -// for await (const chunk of stream) { -// const token = chunk.choices[0]?.delta?.content ?? ""; -// if (token) send("token", { token }); -// } - -// send("done", { sources }); - -// } catch (err) { -// console.error("❌ /ask/stream error:", err); -// send("error", { error: err.message }); -// } - -// res.end(); -// }); - - -// app.use((_req, res) => res.status(404).json({ success: false, error: "Not found" })); - -// // ─── Start ──────────────────────────────────────────────────────────────────── -// async function start() { -// await getEmbedder(); - -// app.listen(CONFIG.port, () => { -// console.log(`Server running on port ${CONFIG.port}`); -// }); -// } - -// start().catch(err => { -// console.error("Fatal startup error:", err); -// process.exit(1); -// }); - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - require("dotenv").config(); const express = require("express"); @@ -657,8 +6,6 @@ const OpenAI = require("openai"); const { pipeline } = require("@xenova/transformers"); const { QdrantClient } = require("@qdrant/js-client-rest"); - -// ─── Config ─────────────────────────────────────────────────────────────────── const CONFIG = { azure: { endpoint: process.env.AZURE_OPENAI_ENDPOINT || "https://cpmindiayoda-resource.services.ai.azure.com", @@ -696,9 +43,9 @@ const qdrant = new QdrantClient({ let _embedder = null; async function getEmbedder() { if (!_embedder) { - console.log("⏳ Loading MiniLM model..."); + console.log("Loading MiniLM model..."); _embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2"); - console.log("✅ Embedding model ready"); + console.log("Embedding model ready"); } return _embedder; } @@ -841,10 +188,6 @@ app.post("/ask", async (req, res) => { } }); -// ─── /ask/stream — word-by-word SSE ────────────────────────────────────────── -// The LLM streams tokens (which may be partial words or multi-word chunks). -// We split every incoming token on whitespace and emit each word as a separate -// SSE "token" event so the frontend can animate them one-by-one. app.post("/ask/stream", async (req, res) => { const { question } = req.body ?? {};