mirror of
https://github.com/kunkunsh/kunkun-ext-rag.git
synced 2025-04-04 02:16:41 +00:00
feat: RAG chat implemented
This commit is contained in:
parent
90ef28eb4a
commit
dd29bd2650
1
.github/workflows/jsr-publish.yml
vendored
1
.github/workflows/jsr-publish.yml
vendored
@ -22,4 +22,5 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bun install
|
bun install
|
||||||
bun run build
|
bun run build
|
||||||
|
bunx kksh@latest verify --publish
|
||||||
bunx jsr publish --allow-slow-types
|
bunx jsr publish --allow-slow-types
|
||||||
|
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"deno.enable": true
|
||||||
|
}
|
26
README.md
26
README.md
@ -3,3 +3,29 @@
|
|||||||
RAG means Retrieval-Augmented Generation.
|
RAG means Retrieval-Augmented Generation.
|
||||||
|
|
||||||
This extension is a local RAG app, that allows you to index a local directory of files and search them using a LLM model.
|
This extension is a local RAG app, that allows you to index a local directory of files and search them using a LLM model.
|
||||||
|
|
||||||
|
If you don't know what RAG is, see [Wikipedia: RAG](https://en.wikipedia.org/wiki/Retrieval-augmented_generation)
|
||||||
|
|
||||||
|
Basically, this extension allows you index local files and directories and search them using a LLM model.
|
||||||
|
|
||||||
|
For now, only text files and pdf files are supported.
|
||||||
|
|
||||||
|
The following file extensions are supported for `Add Files`:
|
||||||
|
|
||||||
|
- `.txt`
|
||||||
|
- `.pdf`
|
||||||
|
- `.md`
|
||||||
|
- `.mdx`
|
||||||
|
|
||||||
|
`.pdf` is not supported yet for `Add Directory`.
|
||||||
|
|
||||||
|
> [!CAUTION]
|
||||||
|
> If you want other file extensions to be supported, please send a issue to the repository.
|
||||||
|
> I will add options to let user add dynamic file extensions if there are people using this extension.
|
||||||
|
|
||||||
|
This is to prevent indexing other files you may not want to index, like lock files.
|
||||||
|
|
||||||
|
## Sample Images
|
||||||
|
|
||||||
|

|
||||||
|

|
||||||
|
@ -1,17 +1,22 @@
|
|||||||
import { FaissStore } from '@langchain/community/vectorstores/faiss';
|
import { FaissStore } from '@langchain/community/vectorstores/faiss';
|
||||||
import { OpenAIEmbeddings } from '@langchain/openai';
|
import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
|
||||||
import * as v from 'valibot';
|
import * as v from 'valibot';
|
||||||
import * as path from 'jsr:@std/path';
|
import * as path from 'jsr:@std/path';
|
||||||
import { existsSync, readdirSync } from 'node:fs';
|
import { existsSync, readdirSync } from 'node:fs';
|
||||||
import { Document } from '@langchain/core/documents';
|
import { Document } from '@langchain/core/documents';
|
||||||
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
||||||
import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
|
import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
|
||||||
import { JSONLoader, JSONLinesLoader } from 'langchain/document_loaders/fs/json';
|
|
||||||
import { TextLoader } from 'langchain/document_loaders/fs/text';
|
import { TextLoader } from 'langchain/document_loaders/fs/text';
|
||||||
import { computeSha256FromText } from './crypto.ts';
|
import { computeSha256FromText } from './crypto.ts';
|
||||||
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
|
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
|
||||||
|
import { DenoAPI } from '../src/api.types.ts';
|
||||||
|
import { txtExts } from './constants.ts';
|
||||||
|
import { AIMessageChunk } from '@langchain/core/messages';
|
||||||
|
|
||||||
export const embeddings = new OpenAIEmbeddings({
|
export const embeddings = new OpenAIEmbeddings({
|
||||||
|
// configuration: {
|
||||||
|
// baseURL: 'https://api.deepseek.com'
|
||||||
|
// },
|
||||||
model: 'text-embedding-3-large'
|
model: 'text-embedding-3-large'
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -27,8 +32,8 @@ export async function getDocsFromDirectory(directoryPath: string): Promise<Docum
|
|||||||
});
|
});
|
||||||
|
|
||||||
const loader = new DirectoryLoader(directoryPath, {
|
const loader = new DirectoryLoader(directoryPath, {
|
||||||
'.json': (path) => new JSONLoader(path, '/texts'),
|
// '.json': (path) => new JSONLoader(path, '/texts'),
|
||||||
'.jsonl': (path) => new JSONLinesLoader(path, '/html'),
|
// '.jsonl': (path) => new JSONLinesLoader(path, '/html'),
|
||||||
'.txt': (path) => new TextLoader(path),
|
'.txt': (path) => new TextLoader(path),
|
||||||
'.md': (path) => new TextLoader(path),
|
'.md': (path) => new TextLoader(path),
|
||||||
'.mdx': (path) => new TextLoader(path)
|
'.mdx': (path) => new TextLoader(path)
|
||||||
@ -38,23 +43,22 @@ export async function getDocsFromDirectory(directoryPath: string): Promise<Docum
|
|||||||
return allSplits;
|
return allSplits;
|
||||||
}
|
}
|
||||||
|
|
||||||
export class Bucket {
|
export class Bucket implements DenoAPI {
|
||||||
readonly bucketPath: string;
|
bucketPath: string = '';
|
||||||
readonly faissStorePath: string;
|
faissStorePath: string = '';
|
||||||
readonly metadataPath: string;
|
metadataPath: string = '';
|
||||||
|
bucketDir: string = '';
|
||||||
|
bucketName: string = '';
|
||||||
private _vectorStore: FaissStore | null = null;
|
private _vectorStore: FaissStore | null = null;
|
||||||
filesSha256: Set<string> = new Set();
|
filesSha256: Set<string> = new Set();
|
||||||
|
|
||||||
constructor(
|
async init(bucketDir: string, bucketName: string) {
|
||||||
readonly bucketDir: string,
|
this.bucketDir = bucketDir;
|
||||||
readonly bucketName: string
|
this.bucketName = bucketName;
|
||||||
) {
|
|
||||||
this.bucketPath = path.join(this.bucketDir, this.bucketName);
|
this.bucketPath = path.join(this.bucketDir, this.bucketName);
|
||||||
this.faissStorePath = path.join(this.bucketPath, 'faiss-store');
|
this.faissStorePath = path.join(this.bucketPath, 'faiss-store');
|
||||||
this.metadataPath = path.join(this.bucketPath, 'metadata.json');
|
this.metadataPath = path.join(this.bucketPath, 'metadata.json');
|
||||||
}
|
|
||||||
|
|
||||||
async init() {
|
|
||||||
if (!existsSync(this.bucketPath)) {
|
if (!existsSync(this.bucketPath)) {
|
||||||
Deno.mkdirSync(this.bucketPath, { recursive: true });
|
Deno.mkdirSync(this.bucketPath, { recursive: true });
|
||||||
}
|
}
|
||||||
@ -69,9 +73,6 @@ export class Bucket {
|
|||||||
}
|
}
|
||||||
this.updateMetadata();
|
this.updateMetadata();
|
||||||
this._vectorStore = await this.getVectorStore();
|
this._vectorStore = await this.getVectorStore();
|
||||||
// if (this._vectorStore) {
|
|
||||||
// await this._vectorStore.save(this.faissStorePath);
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
updateMetadata() {
|
updateMetadata() {
|
||||||
@ -160,7 +161,7 @@ export class Bucket {
|
|||||||
this.updateSha256(docs);
|
this.updateSha256(docs);
|
||||||
console.error('Updated sha256', this.filesSha256.size);
|
console.error('Updated sha256', this.filesSha256.size);
|
||||||
// await this.addDocuments(fileteredDocs);
|
// await this.addDocuments(fileteredDocs);
|
||||||
return this.vectorStore.addDocuments(fileteredDocs).catch((err) => {
|
await this.vectorStore.addDocuments(fileteredDocs).catch((err) => {
|
||||||
console.error('Error adding documents', err);
|
console.error('Error adding documents', err);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -184,4 +185,70 @@ export class Bucket {
|
|||||||
this.updateSha256(docs);
|
this.updateSha256(docs);
|
||||||
await this.addDocuments(fileteredDocs);
|
await this.addDocuments(fileteredDocs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async retrieve(query: string) {
|
||||||
|
const retriever = this.vectorStore.asRetriever();
|
||||||
|
const docs = await retriever.invoke(query);
|
||||||
|
const docsText = docs.map((d) => d.pageContent).join('');
|
||||||
|
return docsText;
|
||||||
|
}
|
||||||
|
|
||||||
|
async query(question: string) {
|
||||||
|
const docsText = await this.retrieve(question);
|
||||||
|
const systemPrompt = `You are an assistant for question-answering tasks.
|
||||||
|
Use the following pieces of retrieved context to answer the question.
|
||||||
|
If you don't know the answer, just say that you don't know.
|
||||||
|
Use three sentences maximum and keep the answer concise.
|
||||||
|
Context: {context}:`;
|
||||||
|
|
||||||
|
// Populate the system prompt with the retrieved context
|
||||||
|
const systemPromptFmt = systemPrompt.replace('{context}', docsText);
|
||||||
|
|
||||||
|
// Create a model
|
||||||
|
const model = new ChatOpenAI({
|
||||||
|
model: 'gpt-4o',
|
||||||
|
temperature: 0
|
||||||
|
});
|
||||||
|
|
||||||
|
// Generate a response
|
||||||
|
const ans: AIMessageChunk = await model.invoke([
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: systemPromptFmt
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: question
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
return ans.content.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
async indexFiles(files: string[]) {
|
||||||
|
console.error('Indexing files', files);
|
||||||
|
for (const file of files) {
|
||||||
|
if (!existsSync(file)) {
|
||||||
|
throw new Error(`File ${file} does not exist`);
|
||||||
|
}
|
||||||
|
// check if file is directory
|
||||||
|
const stats = Deno.statSync(file);
|
||||||
|
console.error('Indexing file', file, 'stats.isFile', stats.isFile);
|
||||||
|
if (stats.isFile) {
|
||||||
|
const ext = path.extname(file);
|
||||||
|
if (txtExts.includes(ext)) {
|
||||||
|
console.error('Adding text file 1', file);
|
||||||
|
await this.addTextFile(file);
|
||||||
|
console.error('Finished adding text file', file);
|
||||||
|
} else if (ext === '.pdf') {
|
||||||
|
console.error('Adding pdf file', file);
|
||||||
|
await this.addPDF(file);
|
||||||
|
} else {
|
||||||
|
throw new Error(`Unsupported file type: ${ext}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.error('Adding directory', file);
|
||||||
|
await this.addDirectory(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,77 +0,0 @@
|
|||||||
// import { FaissStore } from '@langchain/community/vectorstores/faiss';
|
|
||||||
// import { Bucket, embeddings, getDocsFromDirectory } from './bucket.ts';
|
|
||||||
// import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
||||||
// import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
|
|
||||||
// import { JSONLoader, JSONLinesLoader } from 'langchain/document_loaders/fs/json';
|
|
||||||
// import { TextLoader } from 'langchain/document_loaders/fs/text';
|
|
||||||
// import { OpenAIEmbeddings } from '@langchain/openai';
|
|
||||||
import { existsSync } from 'node:fs';
|
|
||||||
// import path from 'path';
|
|
||||||
import { txtExts } from './constants.ts';
|
|
||||||
import { Bucket } from './bucket.ts';
|
|
||||||
import path from 'node:path';
|
|
||||||
|
|
||||||
async function indexFiles(bucketName: string, files: string[]): Promise<void> {
|
|
||||||
const bucket = new Bucket('./store', bucketName);
|
|
||||||
// const bucket = new Bucket(extensionSupportPath, bucketName);
|
|
||||||
console.error('bucket path', bucket.bucketPath);
|
|
||||||
console.error('files', files);
|
|
||||||
await bucket.init();
|
|
||||||
for (const file of files) {
|
|
||||||
if (!existsSync(file)) {
|
|
||||||
throw new Error(`File ${file} does not exist`);
|
|
||||||
}
|
|
||||||
console.error('file', file);
|
|
||||||
// check if file is directory
|
|
||||||
const stats = Deno.statSync(file);
|
|
||||||
if (stats.isFile) {
|
|
||||||
const ext = path.extname(file);
|
|
||||||
if (txtExts.includes(ext)) {
|
|
||||||
console.error('Adding text file', file);
|
|
||||||
await bucket.addTextFile(file);
|
|
||||||
console.error('Finished adding text file', file);
|
|
||||||
} else if (ext === '.pdf') {
|
|
||||||
console.error('Adding pdf file', file);
|
|
||||||
await bucket.addPDF(file);
|
|
||||||
} else if (stats.isDirectory) {
|
|
||||||
console.error('Adding directory', file);
|
|
||||||
await bucket.addDirectory(file);
|
|
||||||
} else {
|
|
||||||
throw new Error(`Unsupported file type: ${ext}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
await bucket.save();
|
|
||||||
}
|
|
||||||
indexFiles('Kunkun Docs', ['/Users/hk/Dev/kunkun-docs/src/content/docs/developer/DX.mdx']);
|
|
||||||
|
|
||||||
// const bucket = new Bucket(
|
|
||||||
// '/Users/hk/Dev/kunkun-extension-repos/kunkun-ext-rag/extensions_support',
|
|
||||||
// 'Kunkun Docs'
|
|
||||||
// );
|
|
||||||
// await bucket.init();
|
|
||||||
// const files = ['/Users/hk/Dev/kunkun-docs/src/content/docs/developer/manifest.mdx'];
|
|
||||||
// for (const file of files) {
|
|
||||||
// if (!existsSync(file)) {
|
|
||||||
// throw new Error(`File ${file} does not exist`);
|
|
||||||
// }
|
|
||||||
// console.error('file', file);
|
|
||||||
// // check if file is directory
|
|
||||||
// const stats = Deno.statSync(file);
|
|
||||||
// if (stats.isFile) {
|
|
||||||
// const ext = path.extname(file);
|
|
||||||
// if (txtExts.includes(ext)) {
|
|
||||||
// console.error('Adding text file', file);
|
|
||||||
// await bucket.addTextFile(file);
|
|
||||||
// } else if (ext === '.pdf') {
|
|
||||||
// console.error('Adding pdf file', file);
|
|
||||||
// await bucket.addPDF(file);
|
|
||||||
// } else if (stats.isDirectory) {
|
|
||||||
// console.error('Adding directory', file);
|
|
||||||
// await bucket.addDirectory(file);
|
|
||||||
// } else {
|
|
||||||
// throw new Error(`Unsupported file type: ${ext}`);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// await bucket.save();
|
|
@ -1,48 +1,5 @@
|
|||||||
import { expose } from '@kunkun/api/runtime/deno';
|
import { expose } from '@kunkun/api/runtime/deno';
|
||||||
import type { DenoAPI } from '../src/api.types.ts';
|
import type { DenoAPI } from '../src/api.types.ts';
|
||||||
import { Bucket } from './bucket.ts';
|
import { Bucket } from './bucket.ts';
|
||||||
import { existsSync } from 'node:fs';
|
|
||||||
import path from 'node:path';
|
|
||||||
import { txtExts } from './constants.ts';
|
|
||||||
|
|
||||||
export const extensionSupportPath = Deno.env.get('EXTENSION_SUPPORT');
|
expose(new Bucket() satisfies DenoAPI);
|
||||||
if (!extensionSupportPath) {
|
|
||||||
throw new Error('EXTENSION_SUPPORT is not set');
|
|
||||||
}
|
|
||||||
|
|
||||||
expose({
|
|
||||||
async indexFiles(bucketName: string, files: string[]): Promise<void> {
|
|
||||||
const cwd = Deno.cwd();
|
|
||||||
console.error('cwd', cwd);
|
|
||||||
const bucket = new Bucket(extensionSupportPath, bucketName);
|
|
||||||
// const bucket = new Bucket(extensionSupportPath, bucketName);
|
|
||||||
console.error('bucket path', bucket.bucketPath);
|
|
||||||
console.error('files', files);
|
|
||||||
await bucket.init();
|
|
||||||
for (const file of files) {
|
|
||||||
if (!existsSync(file)) {
|
|
||||||
throw new Error(`File ${file} does not exist`);
|
|
||||||
}
|
|
||||||
console.error('file', file);
|
|
||||||
// check if file is directory
|
|
||||||
const stats = Deno.statSync(file);
|
|
||||||
if (stats.isFile) {
|
|
||||||
const ext = path.extname(file);
|
|
||||||
if (txtExts.includes(ext)) {
|
|
||||||
console.error('Adding text file', file);
|
|
||||||
await bucket.addTextFile(file);
|
|
||||||
console.error('Finished adding text file', file);
|
|
||||||
} else if (ext === '.pdf') {
|
|
||||||
console.error('Adding pdf file', file);
|
|
||||||
await bucket.addPDF(file);
|
|
||||||
} else if (stats.isDirectory) {
|
|
||||||
console.error('Adding directory', file);
|
|
||||||
await bucket.addDirectory(file);
|
|
||||||
} else {
|
|
||||||
throw new Error(`Unsupported file type: ${ext}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
await bucket.save();
|
|
||||||
}
|
|
||||||
} satisfies DenoAPI);
|
|
||||||
|
@ -60,60 +60,60 @@ async function deleteDocuments(vectorStore: FaissStore, ids: string[]) {
|
|||||||
|
|
||||||
const vectorStore = await getVectorStore();
|
const vectorStore = await getVectorStore();
|
||||||
|
|
||||||
// const llm = new ChatOpenAI({
|
const llm = new ChatOpenAI({
|
||||||
// model: "gpt-4o-mini",
|
model: "gpt-4o-mini",
|
||||||
// temperature: 0,
|
temperature: 0,
|
||||||
// });
|
});
|
||||||
|
|
||||||
// // Define prompt for question-answering
|
// Define prompt for question-answering
|
||||||
// const promptTemplate = await pull<ChatPromptTemplate>("rlm/rag-prompt");
|
const promptTemplate = await pull<ChatPromptTemplate>("rlm/rag-prompt");
|
||||||
|
|
||||||
// // Define state for application
|
// Define state for application
|
||||||
// const InputStateAnnotation = Annotation.Root({
|
const InputStateAnnotation = Annotation.Root({
|
||||||
// question: Annotation<string>,
|
question: Annotation<string>,
|
||||||
// });
|
});
|
||||||
|
|
||||||
// const StateAnnotation = Annotation.Root({
|
const StateAnnotation = Annotation.Root({
|
||||||
// question: Annotation<string>,
|
question: Annotation<string>,
|
||||||
// context: Annotation<Document[]>,
|
context: Annotation<Document[]>,
|
||||||
// answer: Annotation<string>,
|
answer: Annotation<string>,
|
||||||
// });
|
});
|
||||||
|
|
||||||
// // Define application steps
|
// Define application steps
|
||||||
// const retrieve = async (state: typeof InputStateAnnotation.State) => {
|
const retrieve = async (state: typeof InputStateAnnotation.State) => {
|
||||||
// const retrievedDocs = await vectorStore.similaritySearch(state.question);
|
const retrievedDocs = await vectorStore.similaritySearch(state.question);
|
||||||
// return { context: retrievedDocs };
|
return { context: retrievedDocs };
|
||||||
// };
|
};
|
||||||
|
|
||||||
// const generate = async (state: typeof StateAnnotation.State) => {
|
const generate = async (state: typeof StateAnnotation.State) => {
|
||||||
// const docsContent = state.context.map((doc) => doc.pageContent).join("\n");
|
const docsContent = state.context.map((doc) => doc.pageContent).join("\n");
|
||||||
// const messages = await promptTemplate.invoke({
|
const messages = await promptTemplate.invoke({
|
||||||
// question: state.question,
|
question: state.question,
|
||||||
// context: docsContent,
|
context: docsContent,
|
||||||
// });
|
});
|
||||||
// const response = await llm.invoke(messages);
|
const response = await llm.invoke(messages);
|
||||||
// return { answer: response.content };
|
return { answer: response.content };
|
||||||
// };
|
};
|
||||||
|
|
||||||
// // Compile application and test
|
// Compile application and test
|
||||||
// const graph = new StateGraph(StateAnnotation)
|
const graph = new StateGraph(StateAnnotation)
|
||||||
// .addNode("retrieve", retrieve)
|
.addNode("retrieve", retrieve)
|
||||||
// .addNode("generate", generate)
|
.addNode("generate", generate)
|
||||||
// .addEdge("__start__", "retrieve")
|
.addEdge("__start__", "retrieve")
|
||||||
// .addEdge("retrieve", "generate")
|
.addEdge("retrieve", "generate")
|
||||||
// .addEdge("generate", "__end__")
|
.addEdge("generate", "__end__")
|
||||||
// .compile();
|
.compile();
|
||||||
|
|
||||||
// let inputs = { question: "What is Task Decomposition?" };
|
let inputs = { question: "What is Task Decomposition?" };
|
||||||
|
|
||||||
// while (true) {
|
while (true) {
|
||||||
// const question = prompt("Enter your question (or 'exit' to quit): ");
|
const question = prompt("Enter your question (or 'exit' to quit): ");
|
||||||
// if (!question || question.toLowerCase() === "exit") {
|
if (!question || question.toLowerCase() === "exit") {
|
||||||
// break;
|
break;
|
||||||
// }
|
}
|
||||||
|
|
||||||
// const result = await graph.invoke({ question });
|
const result = await graph.invoke({ question });
|
||||||
// console.log("\nAnswer:");
|
console.log("\nAnswer:");
|
||||||
// console.log(result.answer);
|
console.log(result.answer);
|
||||||
// console.log("\n-------------------\n");
|
console.log("\n-------------------\n");
|
||||||
// }
|
}
|
||||||
|
7
jsr.json
7
jsr.json
@ -1,19 +1,22 @@
|
|||||||
{
|
{
|
||||||
"name": "@kunkun/kunkun-ext-rag",
|
"name": "@kunkun/kunkun-ext-rag",
|
||||||
"version": "0.0.4",
|
"version": "0.0.5",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"exports": "./mod.ts",
|
"exports": "./mod.ts",
|
||||||
"publish": {
|
"publish": {
|
||||||
"include": ["mod.ts", "deno-src", "build", "LICENSE", "README.md", "package.json"]
|
"include": ["mod.ts", "deno-src", "build", "LICENSE", "README.md", "package.json"]
|
||||||
},
|
},
|
||||||
"imports": {
|
"imports": {
|
||||||
|
"@kunkun/api": "jsr:@kunkun/api@^0.0.52",
|
||||||
"@langchain/community": "npm:@langchain/community@^0.3.22",
|
"@langchain/community": "npm:@langchain/community@^0.3.22",
|
||||||
"@langchain/core": "npm:@langchain/core@^0.3.27",
|
"@langchain/core": "npm:@langchain/core@^0.3.27",
|
||||||
"@langchain/langgraph": "npm:@langchain/langgraph@^0.2.38",
|
"@langchain/langgraph": "npm:@langchain/langgraph@^0.2.38",
|
||||||
"@langchain/openai": "npm:@langchain/openai@^0.3.16",
|
"@langchain/openai": "npm:@langchain/openai@^0.3.16",
|
||||||
"@langchain/textsplitters": "npm:@langchain/textsplitters@^0.1.0",
|
"@langchain/textsplitters": "npm:@langchain/textsplitters@^0.1.0",
|
||||||
|
"pdf-parse": "npm:pdf-parse@^1.1.1",
|
||||||
"@std/assert": "jsr:@std/assert@1",
|
"@std/assert": "jsr:@std/assert@1",
|
||||||
|
"valibot": "jsr:@valibot/valibot@^0.42.1",
|
||||||
"faiss-node": "npm:faiss-node@^0.5.1",
|
"faiss-node": "npm:faiss-node@^0.5.1",
|
||||||
"langchain": "npm:langchain@^0.3.9"
|
"langchain": "npm:langchain@^0.3.12"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
19
package.json
19
package.json
@ -3,7 +3,7 @@
|
|||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"name": "kunkun-ext-rag",
|
"name": "kunkun-ext-rag",
|
||||||
"draft": true,
|
"draft": true,
|
||||||
"version": "0.0.4",
|
"version": "0.0.5",
|
||||||
"private": true,
|
"private": true,
|
||||||
"kunkun": {
|
"kunkun": {
|
||||||
"name": "RAG",
|
"name": "RAG",
|
||||||
@ -62,14 +62,25 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"shell:stdin-write",
|
"shell:stdin-write",
|
||||||
"shell:kill"
|
"shell:kill",
|
||||||
|
{
|
||||||
|
"permission": "open:url",
|
||||||
|
"allow": [
|
||||||
|
{
|
||||||
|
"url": "https://en.wikipedia.org/wiki/Retrieval-augmented_generation"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://github.com/kunkunsh/kunkun-ext-rag"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
],
|
],
|
||||||
"customUiCmds": [
|
"customUiCmds": [
|
||||||
{
|
{
|
||||||
"main": "/",
|
"main": "/",
|
||||||
"dist": "build",
|
"dist": "build",
|
||||||
"devMain": "http://localhost:5173",
|
"devMain": "http://localhost:5173",
|
||||||
"name": "RAG",
|
"name": "Local RAG",
|
||||||
"cmds": []
|
"cmds": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -88,9 +99,11 @@
|
|||||||
"@iconify/svelte": "^4.2.0",
|
"@iconify/svelte": "^4.2.0",
|
||||||
"@kksh/api": "^0.0.55",
|
"@kksh/api": "^0.0.55",
|
||||||
"@kksh/svelte5": "0.1.15",
|
"@kksh/svelte5": "0.1.15",
|
||||||
|
"@langchain/openai": "^0.4.2",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"lucide-svelte": "^0.474.0",
|
"lucide-svelte": "^0.474.0",
|
||||||
"mode-watcher": "^0.5.1",
|
"mode-watcher": "^0.5.1",
|
||||||
|
"svelte-markdown": "^0.4.1",
|
||||||
"sveltekit-superforms": "^2.23.1",
|
"sveltekit-superforms": "^2.23.1",
|
||||||
"tailwind-merge": "^2.6.0",
|
"tailwind-merge": "^2.6.0",
|
||||||
"tailwind-variants": "^0.3.1",
|
"tailwind-variants": "^0.3.1",
|
||||||
|
@ -1,3 +1,10 @@
|
|||||||
export interface DenoAPI {
|
export interface DenoAPI {
|
||||||
indexFiles(bucketName: string, files: string[]): Promise<void>;
|
init(bucketDir: string, bucketName: string): Promise<void>;
|
||||||
|
addTextFile(filePath: string): Promise<void>;
|
||||||
|
addPDF(filePath: string): Promise<void>;
|
||||||
|
addDirectory(dir: string): Promise<void>;
|
||||||
|
indexFiles(files: string[]): Promise<void>;
|
||||||
|
save(): Promise<void>;
|
||||||
|
retrieve(query: string): Promise<string>;
|
||||||
|
query(query: string): Promise<string>;
|
||||||
}
|
}
|
||||||
|
@ -11,8 +11,9 @@
|
|||||||
<Card.Title>{dbInfo.name}</Card.Title>
|
<Card.Title>{dbInfo.name}</Card.Title>
|
||||||
<Card.Description><strong>AI Provider:</strong> {dbInfo.ai}</Card.Description>
|
<Card.Description><strong>AI Provider:</strong> {dbInfo.ai}</Card.Description>
|
||||||
</Card.Header>
|
</Card.Header>
|
||||||
<Card.Content>
|
<Card.Content class="grid grid-cols-2 gap-2">
|
||||||
<Button
|
<Button
|
||||||
|
class="w-full"
|
||||||
variant="destructive"
|
variant="destructive"
|
||||||
size="lg"
|
size="lg"
|
||||||
onclick={() =>
|
onclick={() =>
|
||||||
@ -24,7 +25,9 @@
|
|||||||
>
|
>
|
||||||
Delete
|
Delete
|
||||||
</Button>
|
</Button>
|
||||||
<a href={`/database/${dbInfo.id}`}><Button>Manage Database</Button></a>
|
<a class="w-full" href={`/database/${dbInfo.id}`}>
|
||||||
|
<Button size="lg" class="w-full">Use Database</Button>
|
||||||
|
</a>
|
||||||
</Card.Content>
|
</Card.Content>
|
||||||
</Card.Root>
|
</Card.Root>
|
||||||
{/snippet}
|
{/snippet}
|
||||||
|
16
src/lib/components/TauriLink.svelte
Normal file
16
src/lib/components/TauriLink.svelte
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { open } from '@kksh/api/ui/iframe';
|
||||||
|
|
||||||
|
let { href, children } = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<a
|
||||||
|
class="text-blue-500 hover:underline"
|
||||||
|
{href}
|
||||||
|
onclick={(e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
open.url(href);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{@render children()}
|
||||||
|
</a>
|
@ -1,8 +1,7 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { ui } from '@kksh/api/ui/iframe';
|
import { ui } from '@kksh/api/ui/iframe';
|
||||||
import { Sidebar } from '@kksh/svelte5';
|
import { Sidebar } from '@kksh/svelte5';
|
||||||
import Icon from '@iconify/svelte';
|
import { DatabaseIcon, InfoIcon } from 'lucide-svelte';
|
||||||
import { BotIcon, DatabaseIcon, MessageCircleIcon } from 'lucide-svelte';
|
|
||||||
import { onMount } from 'svelte';
|
import { onMount } from 'svelte';
|
||||||
|
|
||||||
onMount(() => {
|
onMount(() => {
|
||||||
@ -16,9 +15,9 @@
|
|||||||
icon: DatabaseIcon
|
icon: DatabaseIcon
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
title: 'Chat',
|
title: 'About',
|
||||||
url: '/chat',
|
url: '/about',
|
||||||
icon: MessageCircleIcon
|
icon: InfoIcon
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
</script>
|
</script>
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import { fs, shell, path, toast } from '@kksh/api/ui/iframe';
|
import { fs, shell, path, toast } from '@kksh/api/ui/iframe';
|
||||||
import type { DenoAPI } from '../api.types';
|
import type { DenoAPI } from '../api.types.ts';
|
||||||
|
|
||||||
export async function getRpcAPI(env: { OPENAI_API_KEY: string; EXTENSION_SUPPORT: string }) {
|
export async function getRpcAPI(env: { OPENAI_API_KEY: string }) {
|
||||||
await installDenoDeps().catch((err) => {
|
await installDenoDeps().catch((err) => {
|
||||||
return toast.error(`Failed to install deno dependencies; ${err.message}`);
|
return toast.error(`Failed to install deno dependencies; ${err.message}`);
|
||||||
});
|
});
|
||||||
@ -13,7 +13,7 @@ export async function getRpcAPI(env: { OPENAI_API_KEY: string; EXTENSION_SUPPORT
|
|||||||
{
|
{
|
||||||
cwd,
|
cwd,
|
||||||
// allowAllEnv: true,
|
// allowAllEnv: true,
|
||||||
allowEnv: ['OPENAI_API_KEY', 'EXTENSION_SUPPORT', 'CWD'],
|
allowEnv: ['OPENAI_API_KEY', 'CWD'],
|
||||||
allowWrite: ['$EXTENSION_SUPPORT'],
|
allowWrite: ['$EXTENSION_SUPPORT'],
|
||||||
allowAllRead: true,
|
allowAllRead: true,
|
||||||
// allowAllWrite: true,
|
// allowAllWrite: true,
|
||||||
|
@ -1,15 +1,30 @@
|
|||||||
<script>
|
<script>
|
||||||
import { base } from '$app/paths';
|
import { base } from '$app/paths';
|
||||||
|
import TauriLink from '@/components/TauriLink.svelte';
|
||||||
import { Alert, Button, ThemeWrapper } from '@kksh/svelte5';
|
import { Alert, Button, ThemeWrapper } from '@kksh/svelte5';
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<ThemeWrapper>
|
<div class="prose container">
|
||||||
<Alert.Root>
|
<h1 class="text-3xl font-bold">About Page</h1>
|
||||||
<Alert.Title class="text-3xl font-bold">About Page</Alert.Title>
|
<strong>Source Code:</strong>
|
||||||
<Alert.Description>
|
<TauriLink href="https://github.com/kunkunsh/kunkun-ext-rag">
|
||||||
<a href="{base}/">
|
https://github.com/kunkunsh/kunkun-ext-rag
|
||||||
<Button>Home Page</Button>
|
</TauriLink>.
|
||||||
</a>
|
<br />
|
||||||
</Alert.Description>
|
<br />
|
||||||
</Alert.Root>
|
<p>
|
||||||
</ThemeWrapper>
|
Kunkun RAG Extension is a local RAG app, that allows you to index a local directory of files and
|
||||||
|
search them using a LLM model.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
If you don't know what RAG is, you can read more about it
|
||||||
|
<TauriLink href="https://en.wikipedia.org/wiki/Retrieval-augmented_generation">
|
||||||
|
https://en.wikipedia.org/wiki/Retrieval-augmented_generation
|
||||||
|
</TauriLink>.
|
||||||
|
</p>
|
||||||
|
<p>You can add files to a database. Currently only text files are supported.</p>
|
||||||
|
<p>
|
||||||
|
Text Files with extension <code>.txt</code>, <code>.md</code>, <code>.mdx</code> are supported. If
|
||||||
|
you need other file types, send a feature request to the repo.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
@ -1,43 +1,51 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { dialog, path } from '@kksh/api/ui/iframe';
|
import { dialog, path, toast } from '@kksh/api/ui/iframe';
|
||||||
import { dbStore } from '@/stores/db';
|
import { dbStore } from '@/stores/db';
|
||||||
import { Button } from '@kksh/svelte5';
|
import { Button, Input, Popover } from '@kksh/svelte5';
|
||||||
import { getRpcAPI } from '@/deno';
|
import { getRpcAPI } from '@/deno';
|
||||||
|
import { enhance } from '$app/forms';
|
||||||
import { goto } from '$app/navigation';
|
import { goto } from '$app/navigation';
|
||||||
import { toast } from '@kksh/api/headless';
|
import { onDestroy, onMount } from 'svelte';
|
||||||
let { data } = $props();
|
import { InfoIcon, LoaderIcon } from 'lucide-svelte';
|
||||||
|
import SvelteMarkdown from 'svelte-markdown';
|
||||||
|
|
||||||
|
let { data } = $props();
|
||||||
|
let query = $state('');
|
||||||
|
let ans = $state('');
|
||||||
|
let loading = $state(false);
|
||||||
const selectedDb = $dbStore.find((db) => db.id === data.id);
|
const selectedDb = $dbStore.find((db) => db.id === data.id);
|
||||||
if (!selectedDb) {
|
|
||||||
toast.error('Database not found', { description: 'Name: ' + data.id });
|
|
||||||
goto('/');
|
|
||||||
}
|
|
||||||
let rpc: Awaited<ReturnType<typeof getRpcAPI>> | undefined;
|
let rpc: Awaited<ReturnType<typeof getRpcAPI>> | undefined;
|
||||||
|
|
||||||
async function indexFiles(files: string[]) {
|
onMount(async () => {
|
||||||
if (!selectedDb) {
|
if (!selectedDb) {
|
||||||
toast.error('Database not found', { description: 'Name: ' + data.id });
|
toast.error('Database not found', { description: 'Name: ' + data.id });
|
||||||
return goto('/');
|
return goto('/');
|
||||||
}
|
}
|
||||||
|
rpc = await getRpcAPI({
|
||||||
|
OPENAI_API_KEY: selectedDb.apiKey
|
||||||
|
});
|
||||||
|
rpc.command.stderr.on('data', (data) => {
|
||||||
|
console.warn(data);
|
||||||
|
});
|
||||||
|
const extSupportDir = await path.extensionSupportDir();
|
||||||
|
await rpc?.api.init(extSupportDir, selectedDb!.name);
|
||||||
|
});
|
||||||
|
|
||||||
|
onDestroy(async () => {
|
||||||
|
await rpc?.process.kill();
|
||||||
|
});
|
||||||
|
|
||||||
|
async function indexFiles(files: string[]) {
|
||||||
try {
|
try {
|
||||||
const extSupportDir = await path.extensionSupportDir();
|
console.log('Start indexing files', files);
|
||||||
rpc = await getRpcAPI({
|
|
||||||
OPENAI_API_KEY: selectedDb.apiKey,
|
await rpc?.api.indexFiles(files);
|
||||||
EXTENSION_SUPPORT: extSupportDir
|
await rpc?.api.save();
|
||||||
});
|
|
||||||
rpc.command.stderr.on('data', (data) => {
|
|
||||||
console.warn(data);
|
|
||||||
});
|
|
||||||
console.log('Start indexing files');
|
|
||||||
await rpc.api.indexFiles(selectedDb!.name, files);
|
|
||||||
console.log('Finished indexing files');
|
console.log('Finished indexing files');
|
||||||
|
toast.success('Finished indexing files');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error indexing files', error);
|
console.error('Error indexing files', error);
|
||||||
toast.error('Failed to index files');
|
toast.error('Failed to index files');
|
||||||
} finally {
|
|
||||||
setTimeout(async () => {
|
|
||||||
await rpc?.process.kill();
|
|
||||||
}, 2_000);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -65,7 +73,51 @@
|
|||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<h1 class="text-2xl font-bold">Manage Database</h1>
|
<h1 class="text-2xl font-bold">
|
||||||
<Button onclick={addFiles}>Add Files</Button>
|
Manage Database
|
||||||
<Button onclick={addDirectory}>Add Directory</Button>
|
<Popover.Root>
|
||||||
|
<Popover.Trigger>
|
||||||
|
<Button size="icon" variant="ghost">
|
||||||
|
<InfoIcon />
|
||||||
|
</Button>
|
||||||
|
</Popover.Trigger>
|
||||||
|
<Popover.Content>
|
||||||
|
Pick the files or directories you want to index into vector database. Then you can use the
|
||||||
|
database to answer questions.
|
||||||
|
</Popover.Content>
|
||||||
|
</Popover.Root>
|
||||||
|
</h1>
|
||||||
|
<div class="flex gap-2">
|
||||||
|
<Button class="w-full" onclick={addFiles}>Add Files</Button>
|
||||||
|
<Button class="w-full" onclick={addDirectory}>Add Directory</Button>
|
||||||
|
</div>
|
||||||
|
<form
|
||||||
|
method="POST"
|
||||||
|
use:enhance={async ({ formElement, formData, action, cancel, submitter }) => {
|
||||||
|
cancel();
|
||||||
|
ans = '';
|
||||||
|
loading = true;
|
||||||
|
if (query.length === 0) {
|
||||||
|
toast.error('Question is required');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ans = (await rpc?.api.query(query)) ?? '';
|
||||||
|
query = '';
|
||||||
|
loading = false;
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<div class="mt-4 flex gap-1">
|
||||||
|
<Input name="query" type="text" bind:value={query} placeholder="Question" />
|
||||||
|
<Button type="submit">Submit</Button>
|
||||||
|
</div>
|
||||||
|
{#if loading}
|
||||||
|
<div class="flex h-64 items-center justify-center">
|
||||||
|
<LoaderIcon class="animate-spin" />
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div class="container mt-4">
|
||||||
|
<SvelteMarkdown source={ans} />
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
import type { PageLoad } from './$types';
|
import type { PageLoad } from './$types';
|
||||||
|
|
||||||
|
export const prerender = false;
|
||||||
|
|
||||||
export const load: PageLoad = ({ params: { id } }) => {
|
export const load: PageLoad = ({ params: { id } }) => {
|
||||||
return { id: parseInt(id) };
|
return { id: parseInt(id) };
|
||||||
};
|
};
|
||||||
|
@ -11,7 +11,9 @@ const config = {
|
|||||||
// adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
|
// adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
|
||||||
// If your environment is not supported, or you settled on a specific environment, switch out the adapter.
|
// If your environment is not supported, or you settled on a specific environment, switch out the adapter.
|
||||||
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
|
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
|
||||||
adapter: adapter({}),
|
adapter: adapter({
|
||||||
|
fallback: '400.html'
|
||||||
|
}),
|
||||||
alias: {
|
alias: {
|
||||||
'@/*': './src/lib/*'
|
'@/*': './src/lib/*'
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user