Index plug: index paragraph nodes (#528)

* index plug: index paragraph nodes

Includes attributes and tags in the paragraphs.
 Only considers top level paragraphs (right below Document node)

* keep ref and pos consistent with other objects

- most objects just store the starting position in the `pos` field.
- ref is usually `${page}@${pos}` so that it works as a link.

* cleanup and clarify comments

* add paragraph to builtins index
pull/532/head
Ian Shehadeh 2023-10-12 14:30:47 -04:00 committed by GitHub
parent c2610cba7e
commit 50105fd044
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 0 deletions

View File

@ -53,6 +53,11 @@ export const builtins: Record<string, Record<string, string>> = {
inDirective: "boolean",
asTemplate: "boolean",
},
paragraph: {
text: "string",
page: "string",
pos: "number",
},
};
export async function loadBuiltinsIntoIndex() {

View File

@ -86,6 +86,11 @@ functions:
events:
- page:index
indexParagraphs:
path: "./paragraph.ts:indexParagraphs"
events:
- page:index
# Backlinks
indexLinks:
path: "./page_links.ts:indexLinks"

51
plugs/index/paragraph.ts Normal file
View File

@ -0,0 +1,51 @@
import type { IndexTreeEvent } from "$sb/app_event.ts";
import { indexObjects } from "./api.ts";
import { renderToText, traverseTree, traverseTreeAsync } from "$sb/lib/tree.ts";
import { extractAttributes } from "$sb/lib/attribute.ts";
/** ParagraphObject An index object for the top level text nodes */
export type ParagraphObject = {
ref: string;
tags: string[];
text: string;
page: string;
pos: number;
} & Record<string, any>;
export async function indexParagraphs({ name: page, tree }: IndexTreeEvent) {
const objects: ParagraphObject[] = [];
await traverseTreeAsync(tree, async (p) => {
// only search directly under document
// Paragraph nodes also appear under block elements
if (p.type == "Document") return false; // continue traversal if p is Document
if (p.type != "Paragraph") return true;
const tags = new Set<string>(["paragraph"]);
// tag the paragraph with any hashtags inside it
traverseTree(p, (e) => {
if (e.type == "Hashtag") {
tags.add(e.children![0].text!.substring(1));
return true;
}
return false;
});
const attrs = await extractAttributes(p, false);
const pos = p.from!;
objects.push({
ref: `${page}@${pos}`,
text: renderToText(p),
tags: [...tags.values()],
page,
pos,
...attrs,
});
// stop on every element except document, including paragraphs
return true;
});
await indexObjects<ParagraphObject>(page, objects);
}