diff --git a/plugs/search/engine.ts b/plugs/search/engine.ts index 6840e9a6..a6b16e05 100644 --- a/plugs/search/engine.ts +++ b/plugs/search/engine.ts @@ -9,6 +9,7 @@ export interface BatchKVStore { get(keys: string[]): Promise<(any | undefined)[]>; set(entries: Map): Promise; delete(keys: string[]): Promise; + queryPrefix(prefix: string): Promise<[string, any][]>; } type ResultObject = { @@ -45,8 +46,8 @@ export class SimpleSearchEngine { // Index an array of documents public async indexDocument(document: Document): Promise { - const updateIndexMap = new Map(); - const updateReverseIndexMap = new Map(); + const updateIndexMap = new Map(); // word!id -> count + const updateReverseIndexMap = new Map(); // id!word -> true const pageContent = this.tokenize(document.text); const pageName = this.tokenize(document.id); @@ -55,24 +56,15 @@ export class SimpleSearchEngine { const stemmedWords = filteredWords.map((word) => this.stem(word)); // Get the current IDs for these stemmed words - const uniqueStemmedWords = [...new Set(stemmedWords)]; - const currentIdsArray = await this.index.get(uniqueStemmedWords); + // const uniqueStemmedWords = [...new Set(stemmedWords)]; - stemmedWords.forEach((stemmedWord) => { - const currentIds = - currentIdsArray[uniqueStemmedWords.indexOf(stemmedWord)] || []; - - currentIds.push(document.id); - updateIndexMap.set(stemmedWord, currentIds); - - if (!updateReverseIndexMap.has(document.id)) { - updateReverseIndexMap.set(document.id, []); - } - - if (!updateReverseIndexMap.get(document.id)!.includes(stemmedWord)) { - updateReverseIndexMap.get(document.id)!.push(stemmedWord); - } - }); + for (const stemmedWord of stemmedWords) { + const key = `${stemmedWord}!${document.id}`; + const revKey = `${document.id}!${stemmedWord}`; + const currentFreq = updateIndexMap.get(key) || 0; + updateIndexMap.set(key, currentFreq + 1); + updateReverseIndexMap.set(revKey, true); + } // console.log("updateIndexMap", updateIndexMap); @@ -86,20 +78,20 @@ export class SimpleSearchEngine { const filteredWords = this.removeStopWords(words); const stemmedWords = filteredWords.map((word) => this.stem(word)); - const wordIdsArray: string[][] = await this.index.get(stemmedWords); - const matchCounts: Map = new Map(); + // const wordIdsArray: string[][] = await this.index.get(stemmedWords); + const matchCounts: Map = new Map(); // pageName -> count - wordIdsArray.forEach((wordIds) => { - if (wordIds) { - wordIds.forEach((id) => { - if (matchCounts.has(id)) { - matchCounts.set(id, matchCounts.get(id)! + 1); - } else { - matchCounts.set(id, 1); - } - }); + for (const stemmedWord of stemmedWords) { + const entries = await this.index.queryPrefix(`${stemmedWord}!`); + for (const [key, value] of entries) { + const id = key.split("!").slice(1).join("!"); + if (matchCounts.has(id)) { + matchCounts.set(id, matchCounts.get(id)! + value); + } else { + matchCounts.set(id, value); + } } - }); + } const results = Array.from(matchCounts.entries()).map( ([id, score]) => ({ id, score }), @@ -110,32 +102,18 @@ export class SimpleSearchEngine { // Delete a document from the index public async deleteDocument(documentId: string): Promise { - const words: string[][] = await this.reverseIndex.get([documentId]); - if (words && words[0]) { - const currentIdsArray: string[][] = await this.index.get(words[0]); - const deleteKeys: string[] = []; - const updateMap = new Map(); - - words[0].forEach((word: string, i: number) => { - const currentIds = currentIdsArray[i]; - if (currentIds) { - const updatedIds = currentIds.filter((id) => id !== documentId); - if (updatedIds.length > 0) { - updateMap.set(word, updatedIds); - } else { - deleteKeys.push(word); - } - } - }); - - if (deleteKeys.length > 0) { - await this.index.delete(deleteKeys); - } - if (updateMap.size > 0) { - await this.index.set(updateMap); - } - - await this.reverseIndex.delete([documentId]); + const words: [string, boolean][] = await this.reverseIndex.queryPrefix( + `${documentId}!`, + ); + const keysToDelete: string[] = []; + const revKeysToDelete: string[] = []; + for (const [wordKey] of words) { + const word = wordKey.split("!").slice(1).join("!"); + keysToDelete.push(`${word}!${documentId}`); + revKeysToDelete.push(wordKey); } + await this.index.delete(keysToDelete); + await this.reverseIndex.delete(revKeysToDelete); + // console.log("Deleted", documentId, keysToDelete, revKeysToDelete); } } diff --git a/plugs/search/search.plug.yaml b/plugs/search/search.plug.yaml index a5b7701c..50fd59dd 100644 --- a/plugs/search/search.plug.yaml +++ b/plugs/search/search.plug.yaml @@ -2,8 +2,6 @@ name: search functions: indexPage: path: search.ts:indexPage - # Only enable in the client - env: client events: - page:index diff --git a/plugs/search/search.ts b/plugs/search/search.ts index a3880337..b604666d 100644 --- a/plugs/search/search.ts +++ b/plugs/search/search.ts @@ -11,6 +11,12 @@ const searchPrefix = "🔍 "; class StoreKVStore implements BatchKVStore { constructor(private prefix: string) { } + async queryPrefix(prefix: string): Promise<[string, any][]> { + const results = await store.queryPrefix(this.prefix + prefix); + return results.map(( + { key, value }, + ) => [key.substring(this.prefix.length), value]); + } get(keys: string[]): Promise<(string[] | undefined)[]> { return store.batchGet(keys.map((key) => this.prefix + key)); }