From 010e2b23f4ada77ec57ea770fb142b7c1af84a9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20S=2E=20=C5=81ukasiewicz?= Date: Sun, 20 Oct 2024 12:39:58 +0200 Subject: [PATCH] Allow tags to start with a number and be escaped with angle brackets (#1116) Includes tests and Markdown/Hashtags page for website. Closes #1072 --- common/markdown_parser/constants.ts | 2 +- common/markdown_parser/parser.test.ts | 54 +++++++++++++++++++++++++++ plug-api/lib/frontmatter.ts | 3 +- plug-api/lib/tags.ts | 29 +++++++++++++- plugs/editor/navigate.ts | 3 +- plugs/index/header.ts | 3 +- plugs/index/paragraph.ts | 3 +- plugs/index/table.ts | 3 +- plugs/index/tag_page.ts | 3 +- plugs/index/tags.ts | 17 +++++---- website/Markdown/Extensions.md | 2 +- website/Markdown/Hashtags.md | 14 +++++++ 12 files changed, 120 insertions(+), 16 deletions(-) create mode 100644 website/Markdown/Hashtags.md diff --git a/common/markdown_parser/constants.ts b/common/markdown_parser/constants.ts index f976b23e..7f0255cd 100644 --- a/common/markdown_parser/constants.ts +++ b/common/markdown_parser/constants.ts @@ -1,5 +1,5 @@ export const wikiLinkRegex = /(!?\[\[)([^\]\|]+)(?:\|([^\]]+))?(\]\])/g; // [fullMatch, firstMark, url, alias, lastMark] export const mdLinkRegex = /!?\[(?[^\]]*)\]\((?<url>.+)\)/g; // [fullMatch, alias, url] export const tagRegex = - /#[^\d\s!@#$%^&*(),.?":{}|<>\\][^\s!@#$%^&*(),.?":{}|<>\\]*/; + /#(?:(?:\d*[^\d\s!@#$%^&*(),.?":{}|<>\\][^\s!@#$%^&*(),.?":{}|<>\\]*)|(?:<[^>\n]+>))/; export const pWikiLinkRegex = new RegExp("^" + wikiLinkRegex.source); // Modified regex used only in parser diff --git a/common/markdown_parser/parser.test.ts b/common/markdown_parser/parser.test.ts index 3709242a..f763460e 100644 --- a/common/markdown_parser/parser.test.ts +++ b/common/markdown_parser/parser.test.ts @@ -5,6 +5,7 @@ import { } from "@silverbulletmd/silverbullet/lib/tree"; import { assert, assertEquals, assertNotEquals } from "@std/assert"; import { parseMarkdown } from "./parser.ts"; +import { extractHashtag, renderHashtag } from "../../plug-api/lib/tags.ts"; const sample1 = `--- type: page @@ -162,3 +163,56 @@ Deno.test("Test lua directive parser", () => { const simpleExample = `Simple \${{a=}}`; console.log(JSON.stringify(parseMarkdown(simpleExample), null, 2)); }); + +const hashtagSample = ` +Hashtags, e.g. #mytag but ignore in code \`#mytag\`. +They can contain slashes like #level/beginner, single quotes, and dashes: #Mike's-idea. +Can be just #a single letter. +But no other #interpunction: #exclamation! #question? +There is a way to write #<tag with spaces> +These cannot span #<multiple +lines> +#no#spacing also works. +Hashtags can start with number if there's something after it: #3dprint #15-52_Trip-to-NYC. +But magazine issue #1 or #123 are not hashtags. +Should support other languages, like #żółć or #井号 +`; + +Deno.test("Test hashtag parser", () => { + const tree = parseMarkdown(hashtagSample); + const hashtags = collectNodesOfType(tree, "Hashtag"); + assertEquals(hashtags.length, 14); + + assertEquals(hashtags[0].children![0].text, "#mytag"); + assertEquals(hashtags[1].children![0].text, "#level/beginner"); + assertEquals(hashtags[2].children![0].text, "#Mike's-idea"); + assertEquals(hashtags[3].children![0].text, "#a"); + assertEquals(hashtags[4].children![0].text, "#interpunction"); + assertEquals(hashtags[5].children![0].text, "#exclamation"); + assertEquals(hashtags[6].children![0].text, "#question"); + assertEquals(hashtags[7].children![0].text, "#<tag with spaces>"); + // multiple lines not allowed + assertEquals(hashtags[8].children![0].text, "#no"); + assertEquals(hashtags[9].children![0].text, "#spacing"); + assertEquals(hashtags[10].children![0].text, "#3dprint"); + assertEquals(hashtags[11].children![0].text, "#15-52_Trip-to-NYC"); + assertEquals(hashtags[12].children![0].text, "#żółć"); + assertEquals(hashtags[13].children![0].text, "#井号"); +}); + +Deno.test("Test hashtag helper functions", () => { + assertEquals(extractHashtag("#name"), "name"); + assertEquals(extractHashtag("#123-content"), "123-content"); + assertEquals(extractHashtag("#<escaped tag>"), "escaped tag"); + assertEquals( + extractHashtag("#<allow < and # inside>"), + "allow < and # inside", + ); + + assertEquals(renderHashtag("simple"), "#simple"); + assertEquals(renderHashtag("123-content"), "#123-content"); + assertEquals(renderHashtag("with spaces"), "#<with spaces>"); + assertEquals(renderHashtag("single'quote"), "#single'quote"); + // should behave like this for all characters in tagRegex + assertEquals(renderHashtag("exclamation!"), "#<exclamation!>"); +}); diff --git a/plug-api/lib/frontmatter.ts b/plug-api/lib/frontmatter.ts index e571d798..d90591c0 100644 --- a/plug-api/lib/frontmatter.ts +++ b/plug-api/lib/frontmatter.ts @@ -7,6 +7,7 @@ import { } from "./tree.ts"; import { cleanupJSON } from "./json.ts"; import { YAML } from "../syscalls.ts"; +import { extractHashtag } from "./tags.ts"; export type FrontMatter = { tags?: string[] } & Record<string, any>; @@ -48,7 +49,7 @@ export async function extractFrontmatter( break; } } else if (child.type === "Hashtag") { - const tagname = child.children![0].text!.substring(1); + const tagname = extractHashtag(child.children![0].text!); collectedTags.add(tagname); if ( diff --git a/plug-api/lib/tags.ts b/plug-api/lib/tags.ts index 1af39bf3..6352e005 100644 --- a/plug-api/lib/tags.ts +++ b/plug-api/lib/tags.ts @@ -4,6 +4,7 @@ import { type ParseTree, traverseTree, } from "@silverbulletmd/silverbullet/lib/tree"; +import { tagRegex } from "$common/markdown_parser/constants.ts"; export function updateITags<T>(obj: ObjectValue<T>, frontmatter: FrontMatter) { const itags = [obj.tag, ...frontmatter.tags || []]; @@ -26,7 +27,7 @@ export function extractHashTags(n: ParseTree): string[] { const tags = new Set<string>(); traverseTree(n, (n) => { if (n.type === "Hashtag") { - tags.add(n.children![0].text!.substring(1)); + tags.add(extractHashtag(n.children![0].text!)); return true; } else if (n.type === "OrderedList" || n.type === "BulletList") { // Don't traverse into sub-lists @@ -37,6 +38,32 @@ export function extractHashTags(n: ParseTree): string[] { return [...tags]; } +/** Extract the name from hashtag text, removing # prefix and <angle brackets> if necessary */ +export function extractHashtag(text: string): string { + if (text[0] !== "#") { // you shouldn't call this function at all + console.error("extractHashtag called on already clean string", text); + return text; + } else if (text[1] === "<") { + if (text.slice(-1) !== ">") { // this is malformed: #<name but maybe we're trying to autocomplete + return text.slice(2); + } else { // this is correct #<name> + return text.slice(2, -1); + } + } else { // this is just #name + return text.slice(1); + } +} + +/** Get markup for a hashtag name with # prefix and angle brackets if necessary */ +export function renderHashtag(name: string): string { + // detect with the same regex as the parser + const simple: string = "#" + name; + const match = simple.match(tagRegex); + if (!match || match[0].length !== simple.length) { + return `#<${name}>`; + } else return simple; +} + /** * Cleans hashtags from a tree as a side effect * @param n diff --git a/plugs/editor/navigate.ts b/plugs/editor/navigate.ts index 7731d013..5b986eb1 100644 --- a/plugs/editor/navigate.ts +++ b/plugs/editor/navigate.ts @@ -1,4 +1,5 @@ import type { ClickEvent } from "../../plug-api/types.ts"; +import { extractHashtag } from "../../plug-api/lib/tags.ts"; import { editor, markdown, @@ -123,7 +124,7 @@ async function actionClickOrActionEnter( break; } case "Hashtag": { - const hashtag = mdTree.children![0].text!.slice(1); + const hashtag = extractHashtag(mdTree.children![0].text!); await editor.navigate( { page: `${tagPrefix}${hashtag}`, pos: 0 }, false, diff --git a/plugs/index/header.ts b/plugs/index/header.ts index 5fb62a5c..4151ebaa 100644 --- a/plugs/index/header.ts +++ b/plugs/index/header.ts @@ -11,6 +11,7 @@ import type { import { indexObjects, queryObjects } from "./api.ts"; import { parsePageRef } from "@silverbulletmd/silverbullet/lib/page_ref"; import { extractAttributes } from "@silverbulletmd/silverbullet/lib/attribute"; +import { extractHashtag } from "../../plug-api/lib/tags.ts"; type HeaderObject = ObjectValue< { @@ -35,7 +36,7 @@ export async function indexHeaders({ name: pageName, tree }: IndexTreeEvent) { collectNodesOfType(n, "Hashtag").forEach((h) => { // Push tag to the list, removing the initial # - tags.add(h.children![0].text!.substring(1)); + tags.add(extractHashtag(h.children![0].text!)); h.children = []; }); diff --git a/plugs/index/paragraph.ts b/plugs/index/paragraph.ts index b1945aed..c73f3371 100644 --- a/plugs/index/paragraph.ts +++ b/plugs/index/paragraph.ts @@ -10,6 +10,7 @@ import { extractAttributes } from "@silverbulletmd/silverbullet/lib/attribute"; import type { ObjectValue } from "../../plug-api/types.ts"; import { updateITags } from "@silverbulletmd/silverbullet/lib/tags"; import { extractFrontmatter } from "@silverbulletmd/silverbullet/lib/frontmatter"; +import { extractHashtag } from "../../plug-api/lib/tags.ts"; /** ParagraphObject An index object for the top level text nodes */ export type ParagraphObject = ObjectValue< @@ -40,7 +41,7 @@ export async function indexParagraphs({ name: page, tree }: IndexTreeEvent) { // Collect tags and remove from the tree const tags = new Set<string>(); collectNodesOfType(p, "Hashtag").forEach((tagNode) => { - tags.add(tagNode.children![0].text!.substring(1)); + tags.add(extractHashtag(tagNode.children![0].text!)); // Hacky way to remove the hashtag tagNode.children = []; }); diff --git a/plugs/index/table.ts b/plugs/index/table.ts index e680f5c9..c1fbae19 100644 --- a/plugs/index/table.ts +++ b/plugs/index/table.ts @@ -1,4 +1,5 @@ import type { IndexTreeEvent, ObjectValue } from "../../plug-api/types.ts"; +import { extractHashtag } from "../../plug-api/lib/tags.ts"; import { collectNodesMatching, collectNodesOfType, @@ -51,7 +52,7 @@ export async function indexTables({ name: pageName, tree }: IndexTreeEvent) { const tags = new Set<string>(); collectNodesOfType(row, "Hashtag").forEach((h) => { // Push tag to the list, removing the initial # - tags.add(h.children![0].text!.substring(1)); + tags.add(extractHashtag(h.children![0].text!)); }); const cells = collectNodesOfType(row, "TableCell"); diff --git a/plugs/index/tag_page.ts b/plugs/index/tag_page.ts index c459a7ea..b8fa4313 100644 --- a/plugs/index/tag_page.ts +++ b/plugs/index/tag_page.ts @@ -1,4 +1,5 @@ import type { FileMeta } from "../../plug-api/types.ts"; +import { renderHashtag } from "../../plug-api/lib/tags.ts"; import { markdown, system } from "@silverbulletmd/silverbullet/syscalls"; import { renderToText } from "@silverbulletmd/silverbullet/lib/tree"; import { tagPrefix } from "./constants.ts"; @@ -10,7 +11,7 @@ export async function readFileTag( tagPrefix.length, name.length - ".md".length, ); - const text = `All objects in your space tagged with #${tagName}: + const text = `All objects in your space tagged with ${renderHashtag(tagName)}: \`\`\`template template: | {{#if .}} diff --git a/plugs/index/tags.ts b/plugs/index/tags.ts index 3f4634ef..1500e1d8 100644 --- a/plugs/index/tags.ts +++ b/plugs/index/tags.ts @@ -1,4 +1,8 @@ -import type { CompleteEvent, IndexTreeEvent } from "../../plug-api/types.ts"; +import type { + CompleteEvent, + IndexTreeEvent, + ObjectValue, +} from "../../plug-api/types.ts"; import { extractFrontmatter } from "@silverbulletmd/silverbullet/lib/frontmatter"; import { indexObjects, queryObjects } from "./api.ts"; import { @@ -6,7 +10,7 @@ import { collectNodesOfType, findParentMatching, } from "@silverbulletmd/silverbullet/lib/tree"; -import type { ObjectValue } from "../../plug-api/types.ts"; +import { extractHashtag, renderHashtag } from "../../plug-api/lib/tags.ts"; export type TagObject = ObjectValue<{ name: string; @@ -22,7 +26,7 @@ export async function indexTags({ name, tree }: IndexTreeEvent) { tags.add(`${pageTag}:page`); } collectNodesOfType(tree, "Hashtag").forEach((h) => { - const tagName = h.children![0].text!.substring(1); + const tagName = extractHashtag(h.children![0].text!); // Check if this occurs in the context of a task if (findParentMatching(h, (n) => n.type === "Task")) { tags.add(`${tagName}:task`); @@ -58,11 +62,10 @@ export async function tagComplete(completeEvent: CompleteEvent) { return null; } - const match = /#[^#\d\s\[\]]+\w*$/.exec(completeEvent.linePrefix); + const match = /#[^#\s\[\]]+\w*$/.exec(completeEvent.linePrefix); if (!match) { return null; } - const tagPrefix = match[0].substring(1); // Query all tags with a matching parent const allTags: any[] = await queryObjects<TagObject>("tag", { @@ -71,9 +74,9 @@ export async function tagComplete(completeEvent: CompleteEvent) { }, 5); return { - from: completeEvent.pos - tagPrefix.length, + from: completeEvent.pos - match[0].length, options: allTags.map((tag) => ({ - label: tag.name, + label: renderHashtag(tag.name), type: "tag", })), }; diff --git a/website/Markdown/Extensions.md b/website/Markdown/Extensions.md index b030c612..91441a29 100644 --- a/website/Markdown/Extensions.md +++ b/website/Markdown/Extensions.md @@ -10,7 +10,7 @@ In addition to supporting [[Markdown/Basics|markdown basics]] as standardized by * [[Transclusions]] syntax * [[Markdown/Anchors]] * [[Markdown/Admonitions]] -* Hashtags, e.g. `#mytag`. +* [[Markdown/Hashtags]] * [[Markdown/Command links]] syntax * [Tables](https://www.markdownguide.org/extended-syntax/#tables) * [Task lists](https://www.markdownguide.org/extended-syntax/#task-lists) diff --git a/website/Markdown/Hashtags.md b/website/Markdown/Hashtags.md new file mode 100644 index 00000000..5d03e6a9 --- /dev/null +++ b/website/Markdown/Hashtags.md @@ -0,0 +1,14 @@ +#level/beginner + +These can be used in text to assign an [[Objects#tag]]. If hashtags are the only content of first paragraph, they are applied to the entire page. + +Hashtags can contain letters, dashes, underscores and other characters, but not: +- Whitespace (space, newline etc.) +- Characters from this list: `!@#$%^&*(),.?":{}|<>\` +- Consist of digits only like #123 + +If you need your tags to contain these characters, you have to surround the tag content with angle brackets like this: #<my tag> + +```query +tag where page = @page.name +``` \ No newline at end of file