From 50105fd044f4488902404dded9fa23e0b143933c Mon Sep 17 00:00:00 2001 From: Ian Shehadeh Date: Thu, 12 Oct 2023 14:30:47 -0400 Subject: [PATCH] Index plug: index paragraph nodes (#528) * index plug: index paragraph nodes Includes attributes and tags in the paragraphs. Only considers top level paragraphs (right below Document node) * keep ref and pos consistent with other objects - most objects just store the starting position in the `pos` field. - ref is usually `${page}@${pos}` so that it works as a link. * cleanup and clarify comments * add paragraph to builtins index --- plugs/index/builtins.ts | 5 ++++ plugs/index/index.plug.yaml | 5 ++++ plugs/index/paragraph.ts | 51 +++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 plugs/index/paragraph.ts diff --git a/plugs/index/builtins.ts b/plugs/index/builtins.ts index 49fe15c0..7eaa7507 100644 --- a/plugs/index/builtins.ts +++ b/plugs/index/builtins.ts @@ -53,6 +53,11 @@ export const builtins: Record> = { inDirective: "boolean", asTemplate: "boolean", }, + paragraph: { + text: "string", + page: "string", + pos: "number", + }, }; export async function loadBuiltinsIntoIndex() { diff --git a/plugs/index/index.plug.yaml b/plugs/index/index.plug.yaml index 488da23a..ff5f1f32 100644 --- a/plugs/index/index.plug.yaml +++ b/plugs/index/index.plug.yaml @@ -86,6 +86,11 @@ functions: events: - page:index + indexParagraphs: + path: "./paragraph.ts:indexParagraphs" + events: + - page:index + # Backlinks indexLinks: path: "./page_links.ts:indexLinks" diff --git a/plugs/index/paragraph.ts b/plugs/index/paragraph.ts new file mode 100644 index 00000000..9619e681 --- /dev/null +++ b/plugs/index/paragraph.ts @@ -0,0 +1,51 @@ +import type { IndexTreeEvent } from "$sb/app_event.ts"; +import { indexObjects } from "./api.ts"; +import { renderToText, traverseTree, traverseTreeAsync } from "$sb/lib/tree.ts"; +import { extractAttributes } from "$sb/lib/attribute.ts"; + +/** ParagraphObject An index object for the top level text nodes */ +export type ParagraphObject = { + ref: string; + tags: string[]; + text: string; + page: string; + pos: number; +} & Record; + +export async function indexParagraphs({ name: page, tree }: IndexTreeEvent) { + const objects: ParagraphObject[] = []; + + await traverseTreeAsync(tree, async (p) => { + // only search directly under document + // Paragraph nodes also appear under block elements + if (p.type == "Document") return false; // continue traversal if p is Document + if (p.type != "Paragraph") return true; + + const tags = new Set(["paragraph"]); + // tag the paragraph with any hashtags inside it + traverseTree(p, (e) => { + if (e.type == "Hashtag") { + tags.add(e.children![0].text!.substring(1)); + return true; + } + + return false; + }); + + const attrs = await extractAttributes(p, false); + const pos = p.from!; + objects.push({ + ref: `${page}@${pos}`, + text: renderToText(p), + tags: [...tags.values()], + page, + pos, + ...attrs, + }); + + // stop on every element except document, including paragraphs + return true; + }); + + await indexObjects(page, objects); +}