Fixes #621 with improved snippets for page links

2024-03-02 14:48:02 +01:00 · 2024-03-02 14:48:02 +01:00 · 89e2e7a37c
parent 63eb99e0d3
commit 89e2e7a37c
7 changed files with 105 additions and 30 deletions
--- a/plugs/index/header.ts
+++ b/plugs/index/header.ts
@ -56,7 +56,7 @@ export async function indexHeaders({ name: pageName, tree }: IndexTreeEvent) {
    });
  }
-  console.log("Found", headers, "headers(s)");
+  // console.log("Found", headers, "headers(s)");
  await indexObjects(pageName, headers);
 }
--- a/plugs/index/page_links.ts
+++ b/plugs/index/page_links.ts
@ -6,6 +6,7 @@ import { ObjectValue } from "../../plug-api/types.ts";
 import { extractFrontmatter } from "$sb/lib/frontmatter.ts";
 import { updateITags } from "$sb/lib/tags.ts";
 import { parsePageRef } from "$sb/lib/page_ref.ts";
 import { extractSnippetAroundIndex } from "./snippet_extractor.ts";
 const pageRefRegex = /\[\[([^\]]+)\]\]/g;
@ -20,30 +21,6 @@ export type LinkObject = ObjectValue<{
  asTemplate: boolean;
 }>;
 export function extractSnippet(text: string, pos: number): string {
  let prefix = "";
  for (let i = pos - 1; i > 0; i--) {
    if (text[i] === "\n") {
      break;
    }
    prefix = text[i] + prefix;
    if (prefix.length > 25) {
      break;
    }
  }
  let suffix = "";
  for (let i = pos; i < text.length; i++) {
    if (text[i] === "\n") {
      break;
    }
    suffix += text[i];
    if (suffix.length > 25) {
      break;
    }
  }
  return prefix + suffix;
 }
 export async function indexLinks({ name, tree }: IndexTreeEvent) {
  const links: ObjectValue<LinkObject>[] = [];
  // [[Style Links]]
@ -62,7 +39,7 @@ export async function indexLinks({ name, tree }: IndexTreeEvent) {
        ref: `${name}@${pos}`,
        tag: "link",
        toPage: toPage,
-        snippet: extractSnippet(pageText, pos),
+        snippet: extractSnippetAroundIndex(pageText, pos),
        pos,
        page: name,
        asTemplate: false,
@ -97,7 +74,7 @@ export async function indexLinks({ name, tree }: IndexTreeEvent) {
            tag: "link",
            toPage: pageRefName,
            page: name,
-            snippet: extractSnippet(pageText, pos),
+            snippet: extractSnippetAroundIndex(pageText, pos),
            pos: pos,
            asTemplate: true,
          };
--- a/plugs/index/snippet_extractor.test.ts
+++ b/plugs/index/snippet_extractor.test.ts
@ -0,0 +1,20 @@
 import { assertEquals } from "$lib/test_deps.ts";
 import { extractSnippetAroundIndex } from "./snippet_extractor.ts";
 Deno.test("SnippetExtractor", () => {
  const testText = `# Ongoing things
    This is all about [[Diplomas]], and stuff like that. More stuff.
    `;
  assertEquals(
    extractSnippetAroundIndex(testText, testText.indexOf("[[Diplomas]]")),
    "This is all about [[Diplomas]], and stuff like that.",
  );
  const testText2 =
    `A much much much much much much much much much much much longer sentence [[Diplomas]], that just keeps and keeps and keeps and keeps and keeps going.
  `;
  assertEquals(
    extractSnippetAroundIndex(testText2, testText2.indexOf("[[Diplomas]]")),
    "...much much much much much much much longer sentence [[Diplomas]], that just keeps and keeps and keeps and...",
  );
 });
--- a/plugs/index/snippet_extractor.ts
+++ b/plugs/index/snippet_extractor.ts
@ -0,0 +1,61 @@
 export function extractSnippetAroundIndex(
  text: string,
  index: number,
  maxSnippetLength: number = 100,
 ): string {
  // Use Intl.Segmenter to segment the text into sentences
  const sentenceSegmenter = new Intl.Segmenter("en", {
    granularity: "sentence",
  });
  const sentences = [...sentenceSegmenter.segment(text)].map((segment) =>
    segment.segment
  );
  // Find the sentence that contains the index
  let currentLength = 0;
  let targetSentence = "";
  for (const sentence of sentences) {
    if (index >= currentLength && index < currentLength + sentence.length) {
      targetSentence = sentence;
      break;
    }
    currentLength += sentence.length;
  }
  // If the target sentence is within the maxSnippetLength, return it
  if (targetSentence.length <= maxSnippetLength) {
    return targetSentence.trim();
  }
  const indexInSentence = index - currentLength;
  // Regex for checking if a character is a word character with unicode support
  const isWordCharacter = /[\p{L}\p{N}_]/u;
  // Find a reasonable word boundary to start the snippet
  let snippetStartIndex = Math.max(indexInSentence - maxSnippetLength / 2, 0);
  while (
    snippetStartIndex > 0 &&
    isWordCharacter.test(targetSentence[snippetStartIndex])
  ) {
    snippetStartIndex--;
  }
  snippetStartIndex = Math.max(snippetStartIndex, 0);
  // Find a reasonable word boundary to end the snippet
  let snippetEndIndex = Math.min(
    indexInSentence + maxSnippetLength / 2,
    targetSentence.length,
  );
  while (
    snippetEndIndex < targetSentence.length &&
    isWordCharacter.test(targetSentence[snippetEndIndex])
  ) {
    snippetEndIndex++;
  }
  snippetEndIndex = Math.min(snippetEndIndex, targetSentence.length);
  // Extract and return the refined snippet
  return "..." +
    targetSentence.substring(snippetStartIndex, snippetEndIndex).trim() + "...";
 }
--- a/plugs/index/widget.ts
+++ b/plugs/index/widget.ts
@ -66,11 +66,11 @@ export async function renderTemplateWidgets(side: "top" | "bottom"): Promise<
      rewritePageRefs(parsedMarkdown, template.ref);
      renderedTemplate = renderToText(parsedMarkdown);
      // console.log("Rendering template", template.ref, renderedTemplate);
      templateBits.push(renderedTemplate.trim());
    }
  }
  const summaryText = templateBits.join("\n");
  // console.log("Summary:", summaryText);
  return {
    markdown: summaryText,
    buttons: [
--- a/web/cm_plugins/markdown_widget.ts
+++ b/web/cm_plugins/markdown_widget.ts
@ -10,6 +10,7 @@ import { parse } from "$common/markdown_parser/parse_tree.ts";
 import { parsePageRef } from "../../plug-api/lib/page_ref.ts";
 import { extendedMarkdownLanguage } from "$common/markdown_parser/parser.ts";
 import { tagPrefix } from "../../plugs/index/constants.ts";
 import { renderToText } from "$sb/lib/tree.ts";
 const activeWidgets = new Set<MarkdownWidget>();
@ -75,6 +76,23 @@ export class MarkdownWidget extends WidgetType {
        this.client.currentPage,
      ],
    );
    const trimmedMarkdown = renderToText(mdTree).trim();
    if (!trimmedMarkdown) {
      // Net empty result after expansion
      div.innerHTML = "";
      this.client.setWidgetCache(
        this.cacheKey,
        { height: div.clientHeight, html: "" },
      );
      return;
    }
    // Parse the markdown again after trimming
    mdTree = parse(
      extendedMarkdownLanguage,
      trimmedMarkdown,
    );
    const html = renderMarkdownToHtml(mdTree, {
      // Annotate every element with its position so we can use it to put
@ -92,7 +110,6 @@ export class MarkdownWidget extends WidgetType {
      },
      preserveAttributes: true,
    });
    // console.log("Got html", html);
    if (cachedHtml === html) {
      // HTML still same as in cache, no need to re-render
--- a/website/Library/Core/Widget/Linked
+++ b/website/Library/Core/Widget/Linked
@ -7,7 +7,7 @@ hooks.bottom.where: 'true'
 {{#if @linkedMentions}}
 # Linked Mentions
 {{#each @linkedMentions}}
-* [[{{ref}}]]: `{{snippet}}`
+* [[{{ref}}]]: “{{snippet}}”
 {{/each}}
 {{/if}}
 {{/let}}