mirror of
https://github.com/kunkunsh/kunkun-ext-rag.git
synced 2025-04-04 02:16:41 +00:00
feat: add deno rpc integration for the extension
This commit is contained in:
parent
6665dccc2b
commit
19a98d0aaa
@ -1,180 +1,187 @@
|
||||
import { FaissStore } from "@langchain/community/vectorstores/faiss";
|
||||
import { OpenAIEmbeddings } from "@langchain/openai";
|
||||
import * as v from "valibot";
|
||||
import * as path from "jsr:@std/path";
|
||||
import { existsSync } from "node:fs";
|
||||
import { Document } from "@langchain/core/documents";
|
||||
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
|
||||
import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
|
||||
import {
|
||||
JSONLoader,
|
||||
JSONLinesLoader,
|
||||
} from "langchain/document_loaders/fs/json";
|
||||
import { TextLoader } from "langchain/document_loaders/fs/text";
|
||||
import { computeSha256FromText } from "./crypto.ts";
|
||||
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
|
||||
import { FaissStore } from '@langchain/community/vectorstores/faiss';
|
||||
import { OpenAIEmbeddings } from '@langchain/openai';
|
||||
import * as v from 'valibot';
|
||||
import * as path from 'jsr:@std/path';
|
||||
import { existsSync, readdirSync } from 'node:fs';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
||||
import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
|
||||
import { JSONLoader, JSONLinesLoader } from 'langchain/document_loaders/fs/json';
|
||||
import { TextLoader } from 'langchain/document_loaders/fs/text';
|
||||
import { computeSha256FromText } from './crypto.ts';
|
||||
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
|
||||
|
||||
export const embeddings = new OpenAIEmbeddings({
|
||||
model: "text-embedding-3-large",
|
||||
model: 'text-embedding-3-large'
|
||||
});
|
||||
|
||||
export const MetadataSchema = v.object({
|
||||
filesSha256: v.array(v.string()),
|
||||
filesSha256: v.array(v.string())
|
||||
});
|
||||
export type Metadata = v.InferOutput<typeof MetadataSchema>;
|
||||
|
||||
export async function getDocsFromDirectory(
|
||||
directoryPath: string
|
||||
): Promise<Document[]> {
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 200,
|
||||
});
|
||||
export async function getDocsFromDirectory(directoryPath: string): Promise<Document[]> {
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 200
|
||||
});
|
||||
|
||||
const loader = new DirectoryLoader(directoryPath, {
|
||||
".json": (path) => new JSONLoader(path, "/texts"),
|
||||
".jsonl": (path) => new JSONLinesLoader(path, "/html"),
|
||||
".txt": (path) => new TextLoader(path),
|
||||
".md": (path) => new TextLoader(path),
|
||||
".mdx": (path) => new TextLoader(path),
|
||||
});
|
||||
const docs = await loader.load();
|
||||
const allSplits = await splitter.splitDocuments(docs);
|
||||
return allSplits;
|
||||
const loader = new DirectoryLoader(directoryPath, {
|
||||
'.json': (path) => new JSONLoader(path, '/texts'),
|
||||
'.jsonl': (path) => new JSONLinesLoader(path, '/html'),
|
||||
'.txt': (path) => new TextLoader(path),
|
||||
'.md': (path) => new TextLoader(path),
|
||||
'.mdx': (path) => new TextLoader(path)
|
||||
});
|
||||
const docs = await loader.load();
|
||||
const allSplits = await splitter.splitDocuments(docs);
|
||||
return allSplits;
|
||||
}
|
||||
|
||||
export class Bucket {
|
||||
private readonly bucketPath: string;
|
||||
private readonly faissStorePath: string;
|
||||
private readonly metadataPath: string;
|
||||
private _vectorStore: FaissStore | null = null;
|
||||
filesSha256: Set<string> = new Set();
|
||||
readonly bucketPath: string;
|
||||
readonly faissStorePath: string;
|
||||
readonly metadataPath: string;
|
||||
private _vectorStore: FaissStore | null = null;
|
||||
filesSha256: Set<string> = new Set();
|
||||
|
||||
constructor(
|
||||
private readonly bucketDir: string,
|
||||
private readonly bucketName: string
|
||||
) {
|
||||
this.bucketPath = path.join(this.bucketDir, this.bucketName);
|
||||
this.faissStorePath = path.join(this.bucketPath, "faiss-store");
|
||||
this.metadataPath = path.join(this.bucketPath, "metadata.json");
|
||||
}
|
||||
constructor(
|
||||
readonly bucketDir: string,
|
||||
readonly bucketName: string
|
||||
) {
|
||||
this.bucketPath = path.join(this.bucketDir, this.bucketName);
|
||||
this.faissStorePath = path.join(this.bucketPath, 'faiss-store');
|
||||
this.metadataPath = path.join(this.bucketPath, 'metadata.json');
|
||||
}
|
||||
|
||||
async init() {
|
||||
if (!existsSync(this.bucketPath)) {
|
||||
Deno.mkdirSync(this.bucketPath, { recursive: true });
|
||||
}
|
||||
if (existsSync(this.metadataPath)) {
|
||||
const metadata = JSON.parse(Deno.readTextFileSync(this.metadataPath));
|
||||
const parsedMetadata = v.safeParse(MetadataSchema, metadata);
|
||||
if (parsedMetadata.success) {
|
||||
this.filesSha256 = new Set(parsedMetadata.output.filesSha256);
|
||||
} else {
|
||||
throw new Error("Invalid metadata");
|
||||
}
|
||||
}
|
||||
this.updateMetadata();
|
||||
this._vectorStore = await this.getVectorStore();
|
||||
// if (this._vectorStore) {
|
||||
// await this._vectorStore.save(this.faissStorePath);
|
||||
// }
|
||||
}
|
||||
async init() {
|
||||
if (!existsSync(this.bucketPath)) {
|
||||
Deno.mkdirSync(this.bucketPath, { recursive: true });
|
||||
}
|
||||
if (existsSync(this.metadataPath)) {
|
||||
const metadata = JSON.parse(Deno.readTextFileSync(this.metadataPath));
|
||||
const parsedMetadata = v.safeParse(MetadataSchema, metadata);
|
||||
if (parsedMetadata.success) {
|
||||
this.filesSha256 = new Set(parsedMetadata.output.filesSha256);
|
||||
} else {
|
||||
throw new Error('Invalid metadata');
|
||||
}
|
||||
}
|
||||
this.updateMetadata();
|
||||
this._vectorStore = await this.getVectorStore();
|
||||
// if (this._vectorStore) {
|
||||
// await this._vectorStore.save(this.faissStorePath);
|
||||
// }
|
||||
}
|
||||
|
||||
updateMetadata() {
|
||||
const metadata: Metadata = {
|
||||
filesSha256: Array.from(this.filesSha256),
|
||||
};
|
||||
Deno.writeTextFileSync(this.metadataPath, JSON.stringify(metadata));
|
||||
}
|
||||
updateMetadata() {
|
||||
const metadata: Metadata = {
|
||||
filesSha256: Array.from(this.filesSha256)
|
||||
};
|
||||
Deno.writeTextFileSync(this.metadataPath, JSON.stringify(metadata));
|
||||
}
|
||||
|
||||
async getVectorStore() {
|
||||
if (
|
||||
existsSync(this.faissStorePath) &&
|
||||
existsSync(path.join(this.faissStorePath, "docstore.json"))
|
||||
) {
|
||||
const vectorStore = await FaissStore.load(
|
||||
this.faissStorePath,
|
||||
embeddings
|
||||
);
|
||||
return vectorStore;
|
||||
}
|
||||
// const vectorStore = await FaissStore.fromDocuments(docs, embeddings);
|
||||
const vectorStore = new FaissStore(embeddings, {});
|
||||
// await vectorStore.save(this.faissStorePath);
|
||||
return vectorStore;
|
||||
}
|
||||
async getVectorStore() {
|
||||
if (
|
||||
existsSync(this.faissStorePath) &&
|
||||
existsSync(path.join(this.faissStorePath, 'docstore.json'))
|
||||
) {
|
||||
const vectorStore = await FaissStore.load(this.faissStorePath, embeddings);
|
||||
return vectorStore;
|
||||
}
|
||||
// const vectorStore = await FaissStore.fromDocuments(docs, embeddings);
|
||||
const vectorStore = new FaissStore(embeddings, {});
|
||||
// await vectorStore.save(this.faissStorePath);
|
||||
return vectorStore;
|
||||
}
|
||||
|
||||
get vectorStore() {
|
||||
if (this._vectorStore === null) {
|
||||
throw new Error("Vector store not initialized");
|
||||
}
|
||||
return this._vectorStore;
|
||||
}
|
||||
get vectorStore() {
|
||||
if (this._vectorStore === null) {
|
||||
throw new Error('Vector store not initialized');
|
||||
}
|
||||
return this._vectorStore;
|
||||
}
|
||||
|
||||
private async addDocuments(documents: Document[]) {
|
||||
await this.vectorStore.addDocuments(documents);
|
||||
await this.vectorStore.save(this.faissStorePath);
|
||||
}
|
||||
private async addDocuments(documents: Document[]) {
|
||||
await this.vectorStore.addDocuments(documents);
|
||||
// await this.vectorStore.save(this.faissStorePath);
|
||||
}
|
||||
|
||||
private fillSha256(documents: Document[]) {
|
||||
for (const doc of documents) {
|
||||
const sha256 = computeSha256FromText(doc.pageContent);
|
||||
doc.metadata.sha256 = sha256;
|
||||
}
|
||||
}
|
||||
private fillSha256(documents: Document[]) {
|
||||
for (const doc of documents) {
|
||||
const sha256 = computeSha256FromText(doc.pageContent);
|
||||
doc.metadata.sha256 = sha256;
|
||||
}
|
||||
}
|
||||
|
||||
private getFilteredDocs(documents: Document[]) {
|
||||
return documents.filter(
|
||||
(doc) => !this.filesSha256.has(doc.metadata.sha256)
|
||||
);
|
||||
}
|
||||
private getFilteredDocs(documents: Document[]) {
|
||||
return documents.filter((doc) => !this.filesSha256.has(doc.metadata.sha256));
|
||||
}
|
||||
|
||||
private updateSha256(docs: Document[]) {
|
||||
for (const doc of docs) {
|
||||
this.filesSha256.add(doc.metadata.sha256 as string);
|
||||
}
|
||||
this.updateMetadata();
|
||||
}
|
||||
private updateSha256(docs: Document[]) {
|
||||
for (const doc of docs) {
|
||||
this.filesSha256.add(doc.metadata.sha256 as string);
|
||||
}
|
||||
this.updateMetadata();
|
||||
}
|
||||
|
||||
async addDirectory(directoryPath: string) {
|
||||
if (!existsSync(directoryPath)) {
|
||||
throw new Error("Directory does not exist");
|
||||
}
|
||||
// check if path is a directory or file
|
||||
const stats = Deno.statSync(directoryPath);
|
||||
if (stats.isFile) {
|
||||
throw new Error("Path is a file");
|
||||
}
|
||||
async addDirectory(directoryPath: string) {
|
||||
if (!existsSync(directoryPath)) {
|
||||
throw new Error('Directory does not exist');
|
||||
}
|
||||
// check if path is a directory or file
|
||||
const stats = Deno.statSync(directoryPath);
|
||||
if (stats.isFile) {
|
||||
throw new Error('Path is a file');
|
||||
}
|
||||
|
||||
const docs = await getDocsFromDirectory(directoryPath);
|
||||
this.fillSha256(docs);
|
||||
const fileteredDocs = this.getFilteredDocs(docs);
|
||||
for (const doc of fileteredDocs) {
|
||||
this.filesSha256.add(doc.metadata.sha256 as string);
|
||||
}
|
||||
this.updateMetadata();
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 200,
|
||||
});
|
||||
const allSplits = await splitter.splitDocuments(fileteredDocs);
|
||||
await this.addDocuments(allSplits);
|
||||
}
|
||||
const docs = await getDocsFromDirectory(directoryPath);
|
||||
this.fillSha256(docs);
|
||||
const fileteredDocs = this.getFilteredDocs(docs);
|
||||
for (const doc of fileteredDocs) {
|
||||
this.filesSha256.add(doc.metadata.sha256 as string);
|
||||
}
|
||||
this.updateMetadata();
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 200
|
||||
});
|
||||
const allSplits = await splitter.splitDocuments(fileteredDocs);
|
||||
await this.addDocuments(allSplits);
|
||||
}
|
||||
|
||||
async addTextFile(filePath: string) {
|
||||
const loader = new TextLoader(filePath);
|
||||
const docs = await loader.load();
|
||||
this.fillSha256(docs);
|
||||
const fileteredDocs = this.getFilteredDocs(docs);
|
||||
this.updateSha256(docs);
|
||||
await this.addDocuments(fileteredDocs);
|
||||
}
|
||||
async addTextFile(filePath: string) {
|
||||
const loader = new TextLoader(filePath);
|
||||
const docs = await loader.load();
|
||||
console.error('Loaded docs', docs.length);
|
||||
this.fillSha256(docs);
|
||||
const fileteredDocs = this.getFilteredDocs(docs);
|
||||
console.error('Filtered docs', fileteredDocs.length);
|
||||
this.updateSha256(docs);
|
||||
console.error('Updated sha256', this.filesSha256.size);
|
||||
// await this.addDocuments(fileteredDocs);
|
||||
return this.vectorStore.addDocuments(fileteredDocs).catch((err) => {
|
||||
console.error('Error adding documents', err);
|
||||
});
|
||||
}
|
||||
|
||||
async addPDF(filePath: string) {
|
||||
const loader = new PDFLoader(filePath);
|
||||
const docs = await loader.load();
|
||||
this.fillSha256(docs);
|
||||
const fileteredDocs = this.getFilteredDocs(docs);
|
||||
this.updateSha256(docs);
|
||||
await this.addDocuments(fileteredDocs);
|
||||
}
|
||||
async save() {
|
||||
console.error('Save Bucket', this.vectorStore.docstore._docs.size);
|
||||
if (existsSync(this.faissStorePath) && readdirSync(this.faissStorePath).length === 0) {
|
||||
Deno.removeSync(this.bucketPath, { recursive: true });
|
||||
}
|
||||
if (this.vectorStore.docstore._docs.size === 0) {
|
||||
throw new Error('No documents to save');
|
||||
}
|
||||
await this.vectorStore.save(this.faissStorePath);
|
||||
}
|
||||
|
||||
async addPDF(filePath: string) {
|
||||
const loader = new PDFLoader(filePath);
|
||||
const docs = await loader.load();
|
||||
this.fillSha256(docs);
|
||||
const fileteredDocs = this.getFilteredDocs(docs);
|
||||
this.updateSha256(docs);
|
||||
await this.addDocuments(fileteredDocs);
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -1 +0,0 @@
|
||||
{"filesSha256":["bcecd6e1f4c1296ebaed37b2686a55485855bb3d04d71a16609297899ef89ba9","f67eb8d401f74305c8a4f21b2ac8986c1db3850aabe2753773ba00255fd4288c","61614a8362e20d8dc96bc8bcd52cd40b9bbf983c862253b9472f1044744bea02","f34a6fbd2e0f3fc1a1a8f3c019935e53e809d38b18fdab67ec742e02e3318a6f","e6929768ff182e0c6714a60157e94420c7942fa2cdad1f201a8302be9e86d064","b19540b04d609f5798e91d6de8ecd0fbbee9f7c3dfb008e2a198247241401c57","324d4f9a84e4849c002ae4091fe7614d2d00cb0bf4989574b307081669f9eb7a","0618796260fd0b8b4cc6daedbac59c06492a74eeebefbddf69fb13b2d2456da8","1a5d9b3ea54168ec8f89e1a93950b1d5c527c831b951070d3ffcce853c6e797f","0cc8092a90aa3f614a87258d372348706bc2f0d33e9235630849cb0aeec4342e","90ef40d14982b96bb2c4c657e17dbf74d8b15f1a36712d2759765c9e5de94995","ec62822e3eeffec14393b16a12d0dcb0920c11ad4fa1a3a6c59252ef39321c57","53f3b51bad507e0799121d30f1a96cb458d6caacda20ee6bd6447aa7a2041772","bc7551da68e709d023b76979f554252467465066285bb4c7aad07e05be82833c","a2fc83c128ebafcfc444c3740a3afcedc2e92b8c10f51724d41da57b8f670546","04c793bf24afcd608ecbbb3606ba1280602f02c678bb885b2e75f423ce394e22","3fe4c596a0bedd833fbe214081467647786a12561e3af0e4cd88a43c2787f35f","95f0df47bfd2b698f43aac7dc22ac13948fe093f838a25ab63c5f6f9b789d942","a9b8caefaa7a959928c587f74399a86549b7680894c1c882a631725a5252881e","ab68d62cb3631acf57b0704996303f2704730dd27fb30fd867092c8af90f4fb5","926688c2cc2bf5214f8ad2f82bdd68a630f0226118f50c54f8d618c2417f4c9f","4e52cee59ad6b3220e31dee7e2c9da499ce2f6c6a6a16305fefc4d05c0d63e08","c28379be7109e76bfccffcf7268b1e767c54e5f279179c773ddb33ba1a83717f","451ecba7690493bf70d26bf127bff2de09d7d5f0b51c16c363f3f372184a2988","f0b6f639135d28f12914f4fd9d8e7f11aa3c67e736cbbfbda36ec97f382b4577","a7516e047d1287385581727d5b019512288207bd2172bd241a0c6255d2b570c0","d67a47978e51dec70271dde449d9d38695a73c9787432379ab13a3b4ef78d7eb","ffbcb29742daf9184167d0e12e191e672b2d0662af402e8dfdea3f33117eb388","c19ae6410ab85759fbba1eb7c0ed49cb4f7a62759288ade5a287714e57f17823","a508c2976b25ed28733d26d2cd7601ee67aff3ad8cc0f1faf221e2ba23f325d4"]}
|
1
deno-src/constants.ts
Normal file
1
deno-src/constants.ts
Normal file
@ -0,0 +1 @@
|
||||
export const txtExts = ['.txt', '.md', '.mdx'];
|
@ -1,19 +1,19 @@
|
||||
{
|
||||
"tasks": {
|
||||
"dev": "deno run --watch -A --node-modules-dir --allow-scripts main.ts"
|
||||
},
|
||||
"imports": {
|
||||
"@kunkun/api": "jsr:@kunkun/api@^0.0.52",
|
||||
"@langchain/community": "npm:@langchain/community@^0.3.22",
|
||||
"@langchain/core": "npm:@langchain/core@^0.3.27",
|
||||
"@langchain/langgraph": "npm:@langchain/langgraph@^0.2.38",
|
||||
"@langchain/openai": "npm:@langchain/openai@^0.3.16",
|
||||
"@langchain/textsplitters": "npm:@langchain/textsplitters@^0.1.0",
|
||||
"pdf-parse": "npm:pdf-parse@^1.1.1",
|
||||
"@std/assert": "jsr:@std/assert@1",
|
||||
"valibot": "jsr:@valibot/valibot@^0.42.1",
|
||||
"faiss-node": "npm:faiss-node@^0.5.1",
|
||||
"langchain": "npm:langchain@^0.3.12"
|
||||
},
|
||||
"nodeModulesDir": "auto"
|
||||
"tasks": {
|
||||
"dev": "deno run --watch -A --node-modules-dir --allow-scripts main.ts"
|
||||
},
|
||||
"imports": {
|
||||
"@kunkun/api": "jsr:@kunkun/api@^0.0.52",
|
||||
"@langchain/community": "npm:@langchain/community@^0.3.22",
|
||||
"@langchain/core": "npm:@langchain/core@^0.3.27",
|
||||
"@langchain/langgraph": "npm:@langchain/langgraph@^0.2.38",
|
||||
"@langchain/openai": "npm:@langchain/openai@^0.3.16",
|
||||
"@langchain/textsplitters": "npm:@langchain/textsplitters@^0.1.0",
|
||||
"pdf-parse": "npm:pdf-parse@^1.1.1",
|
||||
"@std/assert": "jsr:@std/assert@1",
|
||||
"valibot": "jsr:@valibot/valibot@^0.42.1",
|
||||
"faiss-node": "npm:faiss-node@^0.5.1",
|
||||
"langchain": "npm:langchain@^0.3.12"
|
||||
},
|
||||
"nodeModulesDir": "auto"
|
||||
}
|
||||
|
@ -1,20 +1,77 @@
|
||||
import { FaissStore } from "@langchain/community/vectorstores/faiss";
|
||||
import { Bucket, embeddings, getDocsFromDirectory } from "./bucket.ts";
|
||||
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
|
||||
import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
|
||||
import {
|
||||
JSONLoader,
|
||||
JSONLinesLoader,
|
||||
} from "langchain/document_loaders/fs/json";
|
||||
import { TextLoader } from "langchain/document_loaders/fs/text";
|
||||
import { OpenAIEmbeddings } from "@langchain/openai";
|
||||
// import { FaissStore } from '@langchain/community/vectorstores/faiss';
|
||||
// import { Bucket, embeddings, getDocsFromDirectory } from './bucket.ts';
|
||||
// import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
||||
// import { DirectoryLoader } from 'langchain/document_loaders/fs/directory';
|
||||
// import { JSONLoader, JSONLinesLoader } from 'langchain/document_loaders/fs/json';
|
||||
// import { TextLoader } from 'langchain/document_loaders/fs/text';
|
||||
// import { OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { existsSync } from 'node:fs';
|
||||
// import path from 'path';
|
||||
import { txtExts } from './constants.ts';
|
||||
import { Bucket } from './bucket.ts';
|
||||
import path from 'node:path';
|
||||
|
||||
const bucket = new Bucket(
|
||||
"/Users/hk/Dev/kunkun-extension-repos/kunkun-ext-rag/deno-src/buckets",
|
||||
"dev"
|
||||
);
|
||||
await bucket.init();
|
||||
// await bucket.addDirectory(
|
||||
// "/Users/hk/Dev/kunkun-docs/src/content/docs/guides/Extensions/Publish"
|
||||
async function indexFiles(bucketName: string, files: string[]): Promise<void> {
|
||||
const bucket = new Bucket('./store', bucketName);
|
||||
// const bucket = new Bucket(extensionSupportPath, bucketName);
|
||||
console.error('bucket path', bucket.bucketPath);
|
||||
console.error('files', files);
|
||||
await bucket.init();
|
||||
for (const file of files) {
|
||||
if (!existsSync(file)) {
|
||||
throw new Error(`File ${file} does not exist`);
|
||||
}
|
||||
console.error('file', file);
|
||||
// check if file is directory
|
||||
const stats = Deno.statSync(file);
|
||||
if (stats.isFile) {
|
||||
const ext = path.extname(file);
|
||||
if (txtExts.includes(ext)) {
|
||||
console.error('Adding text file', file);
|
||||
await bucket.addTextFile(file);
|
||||
console.error('Finished adding text file', file);
|
||||
} else if (ext === '.pdf') {
|
||||
console.error('Adding pdf file', file);
|
||||
await bucket.addPDF(file);
|
||||
} else if (stats.isDirectory) {
|
||||
console.error('Adding directory', file);
|
||||
await bucket.addDirectory(file);
|
||||
} else {
|
||||
throw new Error(`Unsupported file type: ${ext}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
await bucket.save();
|
||||
}
|
||||
indexFiles('Kunkun Docs', ['/Users/hk/Dev/kunkun-docs/src/content/docs/developer/DX.mdx']);
|
||||
|
||||
// const bucket = new Bucket(
|
||||
// '/Users/hk/Dev/kunkun-extension-repos/kunkun-ext-rag/extensions_support',
|
||||
// 'Kunkun Docs'
|
||||
// );
|
||||
// await bucket.addPDF("/Users/hk/Downloads/WACV_2025_Caroline_Huakun__Copy_.pdf");
|
||||
// await bucket.init();
|
||||
// const files = ['/Users/hk/Dev/kunkun-docs/src/content/docs/developer/manifest.mdx'];
|
||||
// for (const file of files) {
|
||||
// if (!existsSync(file)) {
|
||||
// throw new Error(`File ${file} does not exist`);
|
||||
// }
|
||||
// console.error('file', file);
|
||||
// // check if file is directory
|
||||
// const stats = Deno.statSync(file);
|
||||
// if (stats.isFile) {
|
||||
// const ext = path.extname(file);
|
||||
// if (txtExts.includes(ext)) {
|
||||
// console.error('Adding text file', file);
|
||||
// await bucket.addTextFile(file);
|
||||
// } else if (ext === '.pdf') {
|
||||
// console.error('Adding pdf file', file);
|
||||
// await bucket.addPDF(file);
|
||||
// } else if (stats.isDirectory) {
|
||||
// console.error('Adding directory', file);
|
||||
// await bucket.addDirectory(file);
|
||||
// } else {
|
||||
// throw new Error(`Unsupported file type: ${ext}`);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// await bucket.save();
|
||||
|
@ -1,6 +1,48 @@
|
||||
import { expose } from "@kunkun/api/runtime/deno";
|
||||
import { expose } from '@kunkun/api/runtime/deno';
|
||||
import type { DenoAPI } from '../src/api.types.ts';
|
||||
import { Bucket } from './bucket.ts';
|
||||
import { existsSync } from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { txtExts } from './constants.ts';
|
||||
|
||||
export const extensionSupportPath = Deno.env.get('EXTENSION_SUPPORT');
|
||||
if (!extensionSupportPath) {
|
||||
throw new Error('EXTENSION_SUPPORT is not set');
|
||||
}
|
||||
|
||||
expose({
|
||||
|
||||
})
|
||||
|
||||
async indexFiles(bucketName: string, files: string[]): Promise<void> {
|
||||
const cwd = Deno.cwd();
|
||||
console.error('cwd', cwd);
|
||||
const bucket = new Bucket(extensionSupportPath, bucketName);
|
||||
// const bucket = new Bucket(extensionSupportPath, bucketName);
|
||||
console.error('bucket path', bucket.bucketPath);
|
||||
console.error('files', files);
|
||||
await bucket.init();
|
||||
for (const file of files) {
|
||||
if (!existsSync(file)) {
|
||||
throw new Error(`File ${file} does not exist`);
|
||||
}
|
||||
console.error('file', file);
|
||||
// check if file is directory
|
||||
const stats = Deno.statSync(file);
|
||||
if (stats.isFile) {
|
||||
const ext = path.extname(file);
|
||||
if (txtExts.includes(ext)) {
|
||||
console.error('Adding text file', file);
|
||||
await bucket.addTextFile(file);
|
||||
console.error('Finished adding text file', file);
|
||||
} else if (ext === '.pdf') {
|
||||
console.error('Adding pdf file', file);
|
||||
await bucket.addPDF(file);
|
||||
} else if (stats.isDirectory) {
|
||||
console.error('Adding directory', file);
|
||||
await bucket.addDirectory(file);
|
||||
} else {
|
||||
throw new Error(`Unsupported file type: ${ext}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
await bucket.save();
|
||||
}
|
||||
} satisfies DenoAPI);
|
||||
|
45
package.json
45
package.json
@ -17,7 +17,50 @@
|
||||
"demoImages": [],
|
||||
"permissions": [
|
||||
"clipboard:read-text",
|
||||
"notification:all"
|
||||
"notification:all",
|
||||
"dialog:all",
|
||||
{
|
||||
"permission": "fs:exists",
|
||||
"allow": [
|
||||
{
|
||||
"path": "$EXTENSION/deno-src/node_modules"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"permission": "shell:execute",
|
||||
"allow": [
|
||||
{
|
||||
"cmd": {
|
||||
"program": "deno",
|
||||
"args": [
|
||||
"install",
|
||||
"--allow-scripts"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"permission": "shell:deno:spawn",
|
||||
"allow": [
|
||||
{
|
||||
"path": "$EXTENSION/deno-src/index.ts",
|
||||
"read": "*",
|
||||
"write": "*",
|
||||
"ffi": "*",
|
||||
"sys": [
|
||||
"uid"
|
||||
],
|
||||
"env": "*",
|
||||
"net": [
|
||||
"api.openai.com"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"shell:stdin-write",
|
||||
"shell:kill"
|
||||
],
|
||||
"customUiCmds": [
|
||||
{
|
||||
|
13
pnpm-lock.yaml
generated
Normal file
13
pnpm-lock.yaml
generated
Normal file
@ -0,0 +1,13 @@
|
||||
lockfileVersion: '9.0'
|
||||
|
||||
settings:
|
||||
autoInstallPeers: true
|
||||
excludeLinksFromLockfile: false
|
||||
|
||||
importers:
|
||||
|
||||
.:
|
||||
dependencies:
|
||||
'@kksh/api':
|
||||
specifier: ^0.0.54
|
||||
version: link:../../kunkun/packages/api
|
3
src/api.types.ts
Normal file
3
src/api.types.ts
Normal file
@ -0,0 +1,3 @@
|
||||
export interface DenoAPI {
|
||||
indexFiles(bucketName: string, files: string[]): Promise<void>;
|
||||
}
|
@ -24,6 +24,7 @@
|
||||
>
|
||||
Delete
|
||||
</Button>
|
||||
<a href={`/database/${dbInfo.id}`}><Button>Manage Database</Button></a>
|
||||
</Card.Content>
|
||||
</Card.Root>
|
||||
{/snippet}
|
||||
|
56
src/lib/deno.ts
Normal file
56
src/lib/deno.ts
Normal file
@ -0,0 +1,56 @@
|
||||
import { log, fs, shell, path, toast } from '@kksh/api/ui/iframe';
|
||||
import type { DenoAPI } from '../api.types';
|
||||
|
||||
export async function getRpcAPI(env: { OPENAI_API_KEY: string; EXTENSION_SUPPORT: string }) {
|
||||
await installDenoDeps().catch((err) => {
|
||||
return toast.error(`Failed to install deno dependencies; ${err.message}`);
|
||||
});
|
||||
const cwd = await path.join(await path.extensionDir(), 'deno-src');
|
||||
console.log('cwd', cwd);
|
||||
const { rpcChannel, process, command } = await shell.createDenoRpcChannel<object, DenoAPI>(
|
||||
'$EXTENSION/deno-src/index.ts',
|
||||
[],
|
||||
{
|
||||
cwd,
|
||||
// allowAllEnv: true,
|
||||
allowEnv: ['OPENAI_API_KEY', 'EXTENSION_SUPPORT', 'CWD'],
|
||||
allowAllRead: true,
|
||||
// allowRead: ['$EXTENSION_SUPPORT', '$EXTENSION/deno-src'],
|
||||
// allowWrite: ['$EXTENSION_SUPPORT'],
|
||||
allowAllWrite: true,
|
||||
allowAllFfi: true, // /Users/hk/Dev/kunkun-extension-repos/kunkun-ext-rag/deno-src/node_modules/.deno/faiss-node@0.5.1/node_modules/faiss-node/build/Release/faiss-node.node
|
||||
// allowAllSys: true, // uid
|
||||
allowSys: ['uid'],
|
||||
allowNet: ['api.openai.com'],
|
||||
env
|
||||
},
|
||||
{}
|
||||
);
|
||||
const api = rpcChannel.getAPI();
|
||||
return {
|
||||
api,
|
||||
rpcChannel,
|
||||
process,
|
||||
command
|
||||
};
|
||||
}
|
||||
|
||||
export async function installDenoDeps() {
|
||||
const cwd = await path.join(await path.extensionDir(), 'deno-src');
|
||||
const nodeModulesPath = await path.join(cwd, 'node_modules');
|
||||
console.log('nodeModulesPath', nodeModulesPath);
|
||||
console.log('await fs.exists(nodeModulesPath)', await fs.exists(nodeModulesPath));
|
||||
|
||||
if (await fs.exists(nodeModulesPath)) {
|
||||
console.log('Node modules already installed');
|
||||
return;
|
||||
}
|
||||
|
||||
const command = shell.createCommand('deno', ['install', '--allow-scripts'], { cwd });
|
||||
const ret = await command.execute();
|
||||
if (ret.code !== 0) {
|
||||
// log.error(`Failed to install deno dependencies; ${ret.stderr}`);
|
||||
console.error(`Failed to install deno dependencies; ${ret.stderr}`);
|
||||
throw new Error('Failed to install deno dependencies', { cause: ret.stderr });
|
||||
}
|
||||
}
|
71
src/routes/database/[id]/+page.svelte
Normal file
71
src/routes/database/[id]/+page.svelte
Normal file
@ -0,0 +1,71 @@
|
||||
<script lang="ts">
|
||||
import { dialog, path } from '@kksh/api/ui/iframe';
|
||||
import { dbStore } from '@/stores/db';
|
||||
import { Button } from '@kksh/svelte5';
|
||||
import { getRpcAPI } from '@/deno';
|
||||
import { goto } from '$app/navigation';
|
||||
import { toast } from '@kksh/api/headless';
|
||||
let { data } = $props();
|
||||
|
||||
const selectedDb = $dbStore.find((db) => db.id === data.id);
|
||||
if (!selectedDb) {
|
||||
toast.error('Database not found', { description: 'Name: ' + data.id });
|
||||
goto('/');
|
||||
}
|
||||
let rpc: Awaited<ReturnType<typeof getRpcAPI>> | undefined;
|
||||
|
||||
async function indexFiles(files: string[]) {
|
||||
if (!selectedDb) {
|
||||
toast.error('Database not found', { description: 'Name: ' + data.id });
|
||||
return goto('/');
|
||||
}
|
||||
try {
|
||||
const extSupportDir = await path.extensionSupportDir();
|
||||
rpc = await getRpcAPI({
|
||||
OPENAI_API_KEY: selectedDb.apiKey,
|
||||
EXTENSION_SUPPORT: extSupportDir
|
||||
});
|
||||
rpc.command.stderr.on('data', (data) => {
|
||||
console.warn(data);
|
||||
});
|
||||
console.log('Start indexing files');
|
||||
await rpc.api.indexFiles(selectedDb!.name, files);
|
||||
console.log('Finished indexing files');
|
||||
} catch (error) {
|
||||
console.error('Error indexing files', error);
|
||||
toast.error('Failed to index files');
|
||||
} finally {
|
||||
setTimeout(async () => {
|
||||
await rpc?.process.kill();
|
||||
}, 2_000);
|
||||
}
|
||||
}
|
||||
|
||||
async function addFiles() {
|
||||
dialog
|
||||
.open({
|
||||
multiple: true,
|
||||
directory: false
|
||||
})
|
||||
.then(async (res: string[]) => {
|
||||
await indexFiles(res);
|
||||
});
|
||||
}
|
||||
|
||||
function addDirectory() {
|
||||
dialog
|
||||
.open({
|
||||
multiple: true,
|
||||
directory: true
|
||||
})
|
||||
.then(async (res: string[]) => {
|
||||
await indexFiles(res);
|
||||
});
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="container">
|
||||
<h1 class="text-2xl font-bold">Manage Database</h1>
|
||||
<Button onclick={addFiles}>Add Files</Button>
|
||||
<Button onclick={addDirectory}>Add Directory</Button>
|
||||
</div>
|
5
src/routes/database/[id]/+page.ts
Normal file
5
src/routes/database/[id]/+page.ts
Normal file
@ -0,0 +1,5 @@
|
||||
import type { PageLoad } from './$types';
|
||||
|
||||
export const load: PageLoad = ({ params: { id } }) => {
|
||||
return { id: parseInt(id) };
|
||||
};
|
Loading…
x
Reference in New Issue
Block a user