Fixes #621 with improved snippets for page links

2024-03-02 14:48:02 +01:00 · 2024-03-02 14:48:02 +01:00 · 89e2e7a37c
parent 63eb99e0d3
commit 89e2e7a37c
7 changed files with 105 additions and 30 deletions
--- a/plugs/index/header.ts
+++ b/plugs/index/header.ts
@ -56,7 +56,7 @@ export async function indexHeaders({ name: pageName, tree }: IndexTreeEvent) {
    });
  }

-  console.log("Found", headers, "headers(s)");
+  // console.log("Found", headers, "headers(s)");
  await indexObjects(pageName, headers);
 }

--- a/plugs/index/page_links.ts
+++ b/plugs/index/page_links.ts
@ -6,6 +6,7 @@ import { ObjectValue } from "../../plug-api/types.ts";
 import { extractFrontmatter } from "$sb/lib/frontmatter.ts";
 import { updateITags } from "$sb/lib/tags.ts";
 import { parsePageRef } from "$sb/lib/page_ref.ts";
+import { extractSnippetAroundIndex } from "./snippet_extractor.ts";

 const pageRefRegex = /\[\[([^\]]+)\]\]/g;

@ -20,30 +21,6 @@ export type LinkObject = ObjectValue<{
  asTemplate: boolean;
 }>;

-export function extractSnippet(text: string, pos: number): string {
-  let prefix = "";
-  for (let i = pos - 1; i > 0; i--) {
-    if (text[i] === "\n") {
-      break;
-    }
-    prefix = text[i] + prefix;
-    if (prefix.length > 25) {
-      break;
-    }
-  }
-  let suffix = "";
-  for (let i = pos; i < text.length; i++) {
-    if (text[i] === "\n") {
-      break;
-    }
-    suffix += text[i];
-    if (suffix.length > 25) {
-      break;
-    }
-  }
-  return prefix + suffix;
-}
-
 export async function indexLinks({ name, tree }: IndexTreeEvent) {
  const links: ObjectValue<LinkObject>[] = [];
  // [[Style Links]]
@ -62,7 +39,7 @@ export async function indexLinks({ name, tree }: IndexTreeEvent) {
        ref: `${name}@${pos}`,
        tag: "link",
        toPage: toPage,
-        snippet: extractSnippet(pageText, pos),
+        snippet: extractSnippetAroundIndex(pageText, pos),
        pos,
        page: name,
        asTemplate: false,
@ -97,7 +74,7 @@ export async function indexLinks({ name, tree }: IndexTreeEvent) {
            tag: "link",
            toPage: pageRefName,
            page: name,
-            snippet: extractSnippet(pageText, pos),
+            snippet: extractSnippetAroundIndex(pageText, pos),
            pos: pos,
            asTemplate: true,
          };
--- a/plugs/index/snippet_extractor.test.ts
+++ b/plugs/index/snippet_extractor.test.ts
@ -0,0 +1,20 @@
+import { assertEquals } from "$lib/test_deps.ts";
+import { extractSnippetAroundIndex } from "./snippet_extractor.ts";
+
+Deno.test("SnippetExtractor", () => {
+  const testText = `# Ongoing things
+    This is all about [[Diplomas]], and stuff like that. More stuff.
+    `;
+  assertEquals(
+    extractSnippetAroundIndex(testText, testText.indexOf("[[Diplomas]]")),
+    "This is all about [[Diplomas]], and stuff like that.",
+  );
+
+  const testText2 =
+    `A much much much much much much much much much much much longer sentence [[Diplomas]], that just keeps and keeps and keeps and keeps and keeps going.
+  `;
+  assertEquals(
+    extractSnippetAroundIndex(testText2, testText2.indexOf("[[Diplomas]]")),
+    "...much much much much much much much longer sentence [[Diplomas]], that just keeps and keeps and keeps and...",
+  );
+});
--- a/plugs/index/snippet_extractor.ts
+++ b/plugs/index/snippet_extractor.ts
@ -0,0 +1,61 @@
+export function extractSnippetAroundIndex(
+  text: string,
+  index: number,
+  maxSnippetLength: number = 100,
+): string {
+  // Use Intl.Segmenter to segment the text into sentences
+  const sentenceSegmenter = new Intl.Segmenter("en", {
+    granularity: "sentence",
+  });
+  const sentences = [...sentenceSegmenter.segment(text)].map((segment) =>
+    segment.segment
+  );
+
+  // Find the sentence that contains the index
+  let currentLength = 0;
+  let targetSentence = "";
+  for (const sentence of sentences) {
+    if (index >= currentLength && index < currentLength + sentence.length) {
+      targetSentence = sentence;
+      break;
+    }
+    currentLength += sentence.length;
+  }
+
+  // If the target sentence is within the maxSnippetLength, return it
+  if (targetSentence.length <= maxSnippetLength) {
+    return targetSentence.trim();
+  }
+
+  const indexInSentence = index - currentLength;
+
+  // Regex for checking if a character is a word character with unicode support
+  const isWordCharacter = /[\p{L}\p{N}_]/u;
+
+  // Find a reasonable word boundary to start the snippet
+  let snippetStartIndex = Math.max(indexInSentence - maxSnippetLength / 2, 0);
+  while (
+    snippetStartIndex > 0 &&
+    isWordCharacter.test(targetSentence[snippetStartIndex])
+  ) {
+    snippetStartIndex--;
+  }
+  snippetStartIndex = Math.max(snippetStartIndex, 0);
+
+  // Find a reasonable word boundary to end the snippet
+  let snippetEndIndex = Math.min(
+    indexInSentence + maxSnippetLength / 2,
+    targetSentence.length,
+  );
+  while (
+    snippetEndIndex < targetSentence.length &&
+    isWordCharacter.test(targetSentence[snippetEndIndex])
+  ) {
+    snippetEndIndex++;
+  }
+  snippetEndIndex = Math.min(snippetEndIndex, targetSentence.length);
+
+  // Extract and return the refined snippet
+  return "..." +
+    targetSentence.substring(snippetStartIndex, snippetEndIndex).trim() + "...";
+}
--- a/plugs/index/widget.ts
+++ b/plugs/index/widget.ts
@ -66,11 +66,11 @@ export async function renderTemplateWidgets(side: "top" | "bottom"): Promise<
      rewritePageRefs(parsedMarkdown, template.ref);
      renderedTemplate = renderToText(parsedMarkdown);

-      // console.log("Rendering template", template.ref, renderedTemplate);
      templateBits.push(renderedTemplate.trim());
    }
  }
  const summaryText = templateBits.join("\n");
+  // console.log("Summary:", summaryText);
  return {
    markdown: summaryText,
    buttons: [
--- a/web/cm_plugins/markdown_widget.ts
+++ b/web/cm_plugins/markdown_widget.ts
@ -10,6 +10,7 @@ import { parse } from "$common/markdown_parser/parse_tree.ts";
 import { parsePageRef } from "../../plug-api/lib/page_ref.ts";
 import { extendedMarkdownLanguage } from "$common/markdown_parser/parser.ts";
 import { tagPrefix } from "../../plugs/index/constants.ts";
+import { renderToText } from "$sb/lib/tree.ts";

 const activeWidgets = new Set<MarkdownWidget>();

@ -75,6 +76,23 @@ export class MarkdownWidget extends WidgetType {
        this.client.currentPage,
      ],
    );
+    const trimmedMarkdown = renderToText(mdTree).trim();
+
+    if (!trimmedMarkdown) {
+      // Net empty result after expansion
+      div.innerHTML = "";
+      this.client.setWidgetCache(
+        this.cacheKey,
+        { height: div.clientHeight, html: "" },
+      );
+      return;
+    }
+
+    // Parse the markdown again after trimming
+    mdTree = parse(
+      extendedMarkdownLanguage,
+      trimmedMarkdown,
+    );

    const html = renderMarkdownToHtml(mdTree, {
      // Annotate every element with its position so we can use it to put
@ -92,7 +110,6 @@ export class MarkdownWidget extends WidgetType {
      },
      preserveAttributes: true,
    });
-    // console.log("Got html", html);

    if (cachedHtml === html) {
      // HTML still same as in cache, no need to re-render
--- a/website/Library/Core/Widget/Linked
+++ b/website/Library/Core/Widget/Linked
@ -7,7 +7,7 @@ hooks.bottom.where: 'true'
 {{#if @linkedMentions}}
 # Linked Mentions
 {{#each @linkedMentions}}
-* [[{{ref}}]]: `{{snippet}}`
+* [[{{ref}}]]: “{{snippet}}”
 {{/each}}
 {{/if}}
 {{/let}}