Full-text search is back

pull/513/head
Zef Hemel 2023-08-30 22:16:14 +02:00
parent 79242a524c
commit 4277de2184
3 changed files with 41 additions and 59 deletions

View File

@ -9,6 +9,7 @@ export interface BatchKVStore {
get(keys: string[]): Promise<(any | undefined)[]>; get(keys: string[]): Promise<(any | undefined)[]>;
set(entries: Map<string, any>): Promise<void>; set(entries: Map<string, any>): Promise<void>;
delete(keys: string[]): Promise<void>; delete(keys: string[]): Promise<void>;
queryPrefix(prefix: string): Promise<[string, any][]>;
} }
type ResultObject = { type ResultObject = {
@ -45,8 +46,8 @@ export class SimpleSearchEngine {
// Index an array of documents // Index an array of documents
public async indexDocument(document: Document): Promise<void> { public async indexDocument(document: Document): Promise<void> {
const updateIndexMap = new Map<string, string[]>(); const updateIndexMap = new Map<string, number>(); // word!id -> count
const updateReverseIndexMap = new Map<string, string[]>(); const updateReverseIndexMap = new Map<string, boolean>(); // id!word -> true
const pageContent = this.tokenize(document.text); const pageContent = this.tokenize(document.text);
const pageName = this.tokenize(document.id); const pageName = this.tokenize(document.id);
@ -55,24 +56,15 @@ export class SimpleSearchEngine {
const stemmedWords = filteredWords.map((word) => this.stem(word)); const stemmedWords = filteredWords.map((word) => this.stem(word));
// Get the current IDs for these stemmed words // Get the current IDs for these stemmed words
const uniqueStemmedWords = [...new Set(stemmedWords)]; // const uniqueStemmedWords = [...new Set(stemmedWords)];
const currentIdsArray = await this.index.get(uniqueStemmedWords);
stemmedWords.forEach((stemmedWord) => { for (const stemmedWord of stemmedWords) {
const currentIds = const key = `${stemmedWord}!${document.id}`;
currentIdsArray[uniqueStemmedWords.indexOf(stemmedWord)] || []; const revKey = `${document.id}!${stemmedWord}`;
const currentFreq = updateIndexMap.get(key) || 0;
currentIds.push(document.id); updateIndexMap.set(key, currentFreq + 1);
updateIndexMap.set(stemmedWord, currentIds); updateReverseIndexMap.set(revKey, true);
}
if (!updateReverseIndexMap.has(document.id)) {
updateReverseIndexMap.set(document.id, []);
}
if (!updateReverseIndexMap.get(document.id)!.includes(stemmedWord)) {
updateReverseIndexMap.get(document.id)!.push(stemmedWord);
}
});
// console.log("updateIndexMap", updateIndexMap); // console.log("updateIndexMap", updateIndexMap);
@ -86,20 +78,20 @@ export class SimpleSearchEngine {
const filteredWords = this.removeStopWords(words); const filteredWords = this.removeStopWords(words);
const stemmedWords = filteredWords.map((word) => this.stem(word)); const stemmedWords = filteredWords.map((word) => this.stem(word));
const wordIdsArray: string[][] = await this.index.get(stemmedWords); // const wordIdsArray: string[][] = await this.index.get(stemmedWords);
const matchCounts: Map<string, number> = new Map(); const matchCounts: Map<string, number> = new Map(); // pageName -> count
wordIdsArray.forEach((wordIds) => { for (const stemmedWord of stemmedWords) {
if (wordIds) { const entries = await this.index.queryPrefix(`${stemmedWord}!`);
wordIds.forEach((id) => { for (const [key, value] of entries) {
if (matchCounts.has(id)) { const id = key.split("!").slice(1).join("!");
matchCounts.set(id, matchCounts.get(id)! + 1); if (matchCounts.has(id)) {
} else { matchCounts.set(id, matchCounts.get(id)! + value);
matchCounts.set(id, 1); } else {
} matchCounts.set(id, value);
}); }
} }
}); }
const results = Array.from(matchCounts.entries()).map( const results = Array.from(matchCounts.entries()).map(
([id, score]) => ({ id, score }), ([id, score]) => ({ id, score }),
@ -110,32 +102,18 @@ export class SimpleSearchEngine {
// Delete a document from the index // Delete a document from the index
public async deleteDocument(documentId: string): Promise<void> { public async deleteDocument(documentId: string): Promise<void> {
const words: string[][] = await this.reverseIndex.get([documentId]); const words: [string, boolean][] = await this.reverseIndex.queryPrefix(
if (words && words[0]) { `${documentId}!`,
const currentIdsArray: string[][] = await this.index.get(words[0]); );
const deleteKeys: string[] = []; const keysToDelete: string[] = [];
const updateMap = new Map<string, string[]>(); const revKeysToDelete: string[] = [];
for (const [wordKey] of words) {
words[0].forEach((word: string, i: number) => { const word = wordKey.split("!").slice(1).join("!");
const currentIds = currentIdsArray[i]; keysToDelete.push(`${word}!${documentId}`);
if (currentIds) { revKeysToDelete.push(wordKey);
const updatedIds = currentIds.filter((id) => id !== documentId);
if (updatedIds.length > 0) {
updateMap.set(word, updatedIds);
} else {
deleteKeys.push(word);
}
}
});
if (deleteKeys.length > 0) {
await this.index.delete(deleteKeys);
}
if (updateMap.size > 0) {
await this.index.set(updateMap);
}
await this.reverseIndex.delete([documentId]);
} }
await this.index.delete(keysToDelete);
await this.reverseIndex.delete(revKeysToDelete);
// console.log("Deleted", documentId, keysToDelete, revKeysToDelete);
} }
} }

View File

@ -2,8 +2,6 @@ name: search
functions: functions:
indexPage: indexPage:
path: search.ts:indexPage path: search.ts:indexPage
# Only enable in the client
env: client
events: events:
- page:index - page:index

View File

@ -11,6 +11,12 @@ const searchPrefix = "🔍 ";
class StoreKVStore implements BatchKVStore { class StoreKVStore implements BatchKVStore {
constructor(private prefix: string) { constructor(private prefix: string) {
} }
async queryPrefix(prefix: string): Promise<[string, any][]> {
const results = await store.queryPrefix(this.prefix + prefix);
return results.map((
{ key, value },
) => [key.substring(this.prefix.length), value]);
}
get(keys: string[]): Promise<(string[] | undefined)[]> { get(keys: string[]): Promise<(string[] | undefined)[]> {
return store.batchGet(keys.map((key) => this.prefix + key)); return store.batchGet(keys.map((key) => this.prefix + key));
} }