From 89e2e7a37c05827b656abd1716e5ff8ae40e4f54 Mon Sep 17 00:00:00 2001 From: Zef Hemel Date: Sat, 2 Mar 2024 14:48:02 +0100 Subject: [PATCH] Fixes #621 with improved snippets for page links --- plugs/index/header.ts | 2 +- plugs/index/page_links.ts | 29 +-------- plugs/index/snippet_extractor.test.ts | 20 ++++++ plugs/index/snippet_extractor.ts | 61 +++++++++++++++++++ plugs/index/widget.ts | 2 +- web/cm_plugins/markdown_widget.ts | 19 +++++- .../Library/Core/Widget/Linked Mentions.md | 2 +- 7 files changed, 105 insertions(+), 30 deletions(-) create mode 100644 plugs/index/snippet_extractor.test.ts create mode 100644 plugs/index/snippet_extractor.ts diff --git a/plugs/index/header.ts b/plugs/index/header.ts index 61fe46a7..aff86109 100644 --- a/plugs/index/header.ts +++ b/plugs/index/header.ts @@ -56,7 +56,7 @@ export async function indexHeaders({ name: pageName, tree }: IndexTreeEvent) { }); } - console.log("Found", headers, "headers(s)"); + // console.log("Found", headers, "headers(s)"); await indexObjects(pageName, headers); } diff --git a/plugs/index/page_links.ts b/plugs/index/page_links.ts index 407e55c9..965063f8 100644 --- a/plugs/index/page_links.ts +++ b/plugs/index/page_links.ts @@ -6,6 +6,7 @@ import { ObjectValue } from "../../plug-api/types.ts"; import { extractFrontmatter } from "$sb/lib/frontmatter.ts"; import { updateITags } from "$sb/lib/tags.ts"; import { parsePageRef } from "$sb/lib/page_ref.ts"; +import { extractSnippetAroundIndex } from "./snippet_extractor.ts"; const pageRefRegex = /\[\[([^\]]+)\]\]/g; @@ -20,30 +21,6 @@ export type LinkObject = ObjectValue<{ asTemplate: boolean; }>; -export function extractSnippet(text: string, pos: number): string { - let prefix = ""; - for (let i = pos - 1; i > 0; i--) { - if (text[i] === "\n") { - break; - } - prefix = text[i] + prefix; - if (prefix.length > 25) { - break; - } - } - let suffix = ""; - for (let i = pos; i < text.length; i++) { - if (text[i] === "\n") { - break; - } - suffix += text[i]; - if (suffix.length > 25) { - break; - } - } - return prefix + suffix; -} - export async function indexLinks({ name, tree }: IndexTreeEvent) { const links: ObjectValue[] = []; // [[Style Links]] @@ -62,7 +39,7 @@ export async function indexLinks({ name, tree }: IndexTreeEvent) { ref: `${name}@${pos}`, tag: "link", toPage: toPage, - snippet: extractSnippet(pageText, pos), + snippet: extractSnippetAroundIndex(pageText, pos), pos, page: name, asTemplate: false, @@ -97,7 +74,7 @@ export async function indexLinks({ name, tree }: IndexTreeEvent) { tag: "link", toPage: pageRefName, page: name, - snippet: extractSnippet(pageText, pos), + snippet: extractSnippetAroundIndex(pageText, pos), pos: pos, asTemplate: true, }; diff --git a/plugs/index/snippet_extractor.test.ts b/plugs/index/snippet_extractor.test.ts new file mode 100644 index 00000000..7d9a92f0 --- /dev/null +++ b/plugs/index/snippet_extractor.test.ts @@ -0,0 +1,20 @@ +import { assertEquals } from "$lib/test_deps.ts"; +import { extractSnippetAroundIndex } from "./snippet_extractor.ts"; + +Deno.test("SnippetExtractor", () => { + const testText = `# Ongoing things + This is all about [[Diplomas]], and stuff like that. More stuff. + `; + assertEquals( + extractSnippetAroundIndex(testText, testText.indexOf("[[Diplomas]]")), + "This is all about [[Diplomas]], and stuff like that.", + ); + + const testText2 = + `A much much much much much much much much much much much longer sentence [[Diplomas]], that just keeps and keeps and keeps and keeps and keeps going. + `; + assertEquals( + extractSnippetAroundIndex(testText2, testText2.indexOf("[[Diplomas]]")), + "...much much much much much much much longer sentence [[Diplomas]], that just keeps and keeps and keeps and...", + ); +}); diff --git a/plugs/index/snippet_extractor.ts b/plugs/index/snippet_extractor.ts new file mode 100644 index 00000000..4827a8f2 --- /dev/null +++ b/plugs/index/snippet_extractor.ts @@ -0,0 +1,61 @@ +export function extractSnippetAroundIndex( + text: string, + index: number, + maxSnippetLength: number = 100, +): string { + // Use Intl.Segmenter to segment the text into sentences + const sentenceSegmenter = new Intl.Segmenter("en", { + granularity: "sentence", + }); + const sentences = [...sentenceSegmenter.segment(text)].map((segment) => + segment.segment + ); + + // Find the sentence that contains the index + let currentLength = 0; + let targetSentence = ""; + for (const sentence of sentences) { + if (index >= currentLength && index < currentLength + sentence.length) { + targetSentence = sentence; + break; + } + currentLength += sentence.length; + } + + // If the target sentence is within the maxSnippetLength, return it + if (targetSentence.length <= maxSnippetLength) { + return targetSentence.trim(); + } + + const indexInSentence = index - currentLength; + + // Regex for checking if a character is a word character with unicode support + const isWordCharacter = /[\p{L}\p{N}_]/u; + + // Find a reasonable word boundary to start the snippet + let snippetStartIndex = Math.max(indexInSentence - maxSnippetLength / 2, 0); + while ( + snippetStartIndex > 0 && + isWordCharacter.test(targetSentence[snippetStartIndex]) + ) { + snippetStartIndex--; + } + snippetStartIndex = Math.max(snippetStartIndex, 0); + + // Find a reasonable word boundary to end the snippet + let snippetEndIndex = Math.min( + indexInSentence + maxSnippetLength / 2, + targetSentence.length, + ); + while ( + snippetEndIndex < targetSentence.length && + isWordCharacter.test(targetSentence[snippetEndIndex]) + ) { + snippetEndIndex++; + } + snippetEndIndex = Math.min(snippetEndIndex, targetSentence.length); + + // Extract and return the refined snippet + return "..." + + targetSentence.substring(snippetStartIndex, snippetEndIndex).trim() + "..."; +} diff --git a/plugs/index/widget.ts b/plugs/index/widget.ts index a64d8542..5353c665 100644 --- a/plugs/index/widget.ts +++ b/plugs/index/widget.ts @@ -66,11 +66,11 @@ export async function renderTemplateWidgets(side: "top" | "bottom"): Promise< rewritePageRefs(parsedMarkdown, template.ref); renderedTemplate = renderToText(parsedMarkdown); - // console.log("Rendering template", template.ref, renderedTemplate); templateBits.push(renderedTemplate.trim()); } } const summaryText = templateBits.join("\n"); + // console.log("Summary:", summaryText); return { markdown: summaryText, buttons: [ diff --git a/web/cm_plugins/markdown_widget.ts b/web/cm_plugins/markdown_widget.ts index 7ed59e57..926ccb1a 100644 --- a/web/cm_plugins/markdown_widget.ts +++ b/web/cm_plugins/markdown_widget.ts @@ -10,6 +10,7 @@ import { parse } from "$common/markdown_parser/parse_tree.ts"; import { parsePageRef } from "../../plug-api/lib/page_ref.ts"; import { extendedMarkdownLanguage } from "$common/markdown_parser/parser.ts"; import { tagPrefix } from "../../plugs/index/constants.ts"; +import { renderToText } from "$sb/lib/tree.ts"; const activeWidgets = new Set(); @@ -75,6 +76,23 @@ export class MarkdownWidget extends WidgetType { this.client.currentPage, ], ); + const trimmedMarkdown = renderToText(mdTree).trim(); + + if (!trimmedMarkdown) { + // Net empty result after expansion + div.innerHTML = ""; + this.client.setWidgetCache( + this.cacheKey, + { height: div.clientHeight, html: "" }, + ); + return; + } + + // Parse the markdown again after trimming + mdTree = parse( + extendedMarkdownLanguage, + trimmedMarkdown, + ); const html = renderMarkdownToHtml(mdTree, { // Annotate every element with its position so we can use it to put @@ -92,7 +110,6 @@ export class MarkdownWidget extends WidgetType { }, preserveAttributes: true, }); - // console.log("Got html", html); if (cachedHtml === html) { // HTML still same as in cache, no need to re-render diff --git a/website/Library/Core/Widget/Linked Mentions.md b/website/Library/Core/Widget/Linked Mentions.md index f518ae2e..8bb4e84b 100644 --- a/website/Library/Core/Widget/Linked Mentions.md +++ b/website/Library/Core/Widget/Linked Mentions.md @@ -7,7 +7,7 @@ hooks.bottom.where: 'true' {{#if @linkedMentions}} # Linked Mentions {{#each @linkedMentions}} -* [[{{ref}}]]: `{{snippet}}` +* [[{{ref}}]]: “{{snippet}}” {{/each}} {{/if}} {{/let}}