silverbullet/common/markdown_parser/parser.ts

742 lines
21 KiB
TypeScript
Raw Permalink Normal View History

2024-01-25 18:42:36 +08:00
import { commandLinkRegex } from "../command.ts";
import { yaml as yamlLanguage } from "@codemirror/legacy-modes/mode/yaml?external=@codemirror/language&target=es2022";
2024-07-30 23:33:33 +08:00
import { styleTags, type Tag, tags as t } from "@lezer/highlight";
2022-04-25 16:33:38 +08:00
import {
2024-07-30 23:33:33 +08:00
type BlockContext,
type LeafBlock,
type LeafBlockParser,
type Line,
type MarkdownConfig,
2022-12-05 20:05:48 +08:00
Strikethrough,
Subscript,
Superscript,
} from "@lezer/markdown";
import { markdown } from "@codemirror/lang-markdown";
import { StreamLanguage } from "@codemirror/language";
import * as ct from "./customtags.ts";
2024-01-24 20:34:12 +08:00
import { NakedURLTag } from "./customtags.ts";
import { TaskList } from "./extended_task.ts";
const WikiLink: MarkdownConfig = {
2022-11-29 16:11:23 +08:00
defineNodes: [
{ name: "WikiLink", style: ct.WikiLinkTag },
{ name: "WikiLinkPage", style: ct.WikiLinkPageTag },
{ name: "WikiLinkAlias", style: ct.WikiLinkPageTag },
2024-05-28 02:33:41 +08:00
{ name: "WikiLinkDimensions", style: ct.WikiLinkPageTag },
2022-11-29 16:11:23 +08:00
{ name: "WikiLinkMark", style: t.processingInstruction },
],
parseInline: [
{
name: "WikiLink",
parse(cx, next, pos) {
let match: RegExpMatchArray | null;
if (
2024-05-28 02:33:41 +08:00
next != 91 /* '[' */ &&
next != 33 /* '!' */ ||
!(match = pWikiLinkRegex.exec(cx.slice(pos, cx.end)))
) {
return -1;
}
2024-05-28 02:33:41 +08:00
const [fullMatch, firstMark, page, alias, _lastMark] = match;
2022-11-29 16:11:23 +08:00
const endPos = pos + fullMatch.length;
2022-11-27 15:48:01 +08:00
let aliasElts: any[] = [];
2024-05-28 02:33:41 +08:00
if (alias) {
const pipeStartPos = pos + firstMark.length + page.length;
2022-11-27 15:48:01 +08:00
aliasElts = [
cx.elt("WikiLinkMark", pipeStartPos, pipeStartPos + 1),
cx.elt(
"WikiLinkAlias",
pipeStartPos + 1,
2024-05-28 02:33:41 +08:00
pipeStartPos + 1 + alias.length,
2022-11-27 15:48:01 +08:00
),
];
}
2024-05-28 02:33:41 +08:00
let allElts = cx.elt("WikiLink", pos, endPos, [
cx.elt("WikiLinkMark", pos, pos + firstMark.length),
cx.elt(
"WikiLinkPage",
pos + firstMark.length,
pos + firstMark.length + page.length,
),
...aliasElts,
cx.elt("WikiLinkMark", endPos - 2, endPos),
]);
// If inline image
if (next == 33) {
allElts = cx.elt("Image", pos, endPos, [allElts]);
}
return cx.addElement(allElts);
},
after: "Emphasis",
},
],
};
const CommandLink: MarkdownConfig = {
defineNodes: [
{ name: "CommandLink", style: { "CommandLink/...": ct.CommandLinkTag } },
{ name: "CommandLinkName", style: ct.CommandLinkNameTag },
2022-11-29 16:11:23 +08:00
{ name: "CommandLinkAlias", style: ct.CommandLinkNameTag },
{ name: "CommandLinkArgs", style: ct.CommandLinkArgsTag },
2022-11-29 16:11:23 +08:00
{ name: "CommandLinkMark", style: t.processingInstruction },
],
parseInline: [
{
name: "CommandLink",
parse(cx, next, pos) {
let match: RegExpMatchArray | null;
if (
next != 123 /* '{' */ ||
!(match = commandLinkRegex.exec(cx.slice(pos, cx.end)))
) {
return -1;
}
const [fullMatch, command, pipePart, label, argsPart, args] = match;
2022-11-29 16:11:23 +08:00
const endPos = pos + fullMatch.length;
let aliasElts: any[] = [];
if (pipePart) {
const pipeStartPos = pos + 2 + command.length;
aliasElts = [
cx.elt("CommandLinkMark", pipeStartPos, pipeStartPos + 1),
cx.elt(
"CommandLinkAlias",
pipeStartPos + 1,
pipeStartPos + 1 + label.length,
),
];
}
let argsElts: any[] = [];
if (argsPart) {
2023-12-17 21:26:41 +08:00
const argsStartPos = pos + 2 + command.length +
(pipePart?.length ?? 0);
argsElts = [
cx.elt("CommandLinkMark", argsStartPos, argsStartPos + 2),
cx.elt(
"CommandLinkArgs",
argsStartPos + 2,
argsStartPos + 2 + args.length,
),
];
}
return cx.addElement(
cx.elt("CommandLink", pos, endPos, [
cx.elt("CommandLinkMark", pos, pos + 2),
2022-11-29 16:11:23 +08:00
cx.elt("CommandLinkName", pos + 2, pos + 2 + command.length),
...aliasElts,
...argsElts,
cx.elt("CommandLinkMark", endPos - 2, endPos),
]),
);
},
after: "Emphasis",
},
],
};
const TemplateDirective: MarkdownConfig = {
defineNodes: [
{ name: "TemplateDirective" },
{ name: "TemplateExpressionDirective" },
{ name: "TemplateIfStartDirective", style: ct.DirectiveTag },
{ name: "TemplateEachStartDirective", style: ct.DirectiveTag },
2024-02-03 22:28:24 +08:00
{ name: "TemplateEachVarStartDirective", style: ct.DirectiveTag },
{ name: "TemplateLetStartDirective", style: ct.DirectiveTag },
{ name: "TemplateIfEndDirective", style: ct.DirectiveTag },
{ name: "TemplateEachEndDirective", style: ct.DirectiveTag },
{ name: "TemplateLetEndDirective", style: ct.DirectiveTag },
2024-02-03 22:28:24 +08:00
{ name: "TemplateVar", style: t.variableName },
{ name: "TemplateDirectiveMark", style: ct.DirectiveMarkTag },
],
parseInline: [
{
name: "TemplateDirective",
parse(cx, next, pos) {
const textFromPos = cx.slice(pos, cx.end);
if (
next != 123 /* '{' */ ||
cx.slice(pos, pos + 2) !== "{{"
) {
return -1;
}
let bracketNestingDepth = 0;
let valueLength = 0;
// We need to ensure balanced { and } pairs
loopLabel:
for (; valueLength < textFromPos.length; valueLength++) {
switch (textFromPos[valueLength]) {
case "{":
bracketNestingDepth++;
break;
case "}":
bracketNestingDepth--;
if (bracketNestingDepth === 0) {
// Done!
break loopLabel;
}
break;
}
}
if (bracketNestingDepth !== 0) {
return -1;
}
const bodyText = textFromPos.slice(2, valueLength - 1);
// console.log("Body text", bodyText);
const endPos = pos + valueLength + 1;
let bodyEl: any;
2024-02-03 22:28:24 +08:00
// Is this an let block directive?
const openLetBlockMatch = /^(\s*#let\s*)(@\w+)(\s*=\s*)(.+)$/s.exec(
bodyText,
);
if (openLetBlockMatch) {
const [_, directiveStart, varName, eq, expr] = openLetBlockMatch;
const parsedExpression = highlightingExpressionParser.parse(
2024-02-03 22:28:24 +08:00
expr,
);
bodyEl = cx.elt(
2024-02-03 22:28:24 +08:00
"TemplateLetStartDirective",
pos + 2,
endPos - 2,
2024-02-03 22:28:24 +08:00
[
cx.elt(
"TemplateVar",
pos + 2 + directiveStart.length,
pos + 2 + directiveStart.length + varName.length,
),
cx.elt(
parsedExpression,
pos + 2 + directiveStart.length + varName.length + eq.length,
),
],
);
}
if (!bodyEl) {
2024-02-03 22:28:24 +08:00
// Is this an #each @p = block directive?
const openEachVariableBlockMatch =
2024-02-03 23:14:48 +08:00
/^(\s*#each\s*)(@\w+)(\s+in\s+)(.+)$/s.exec(
2024-02-03 22:28:24 +08:00
bodyText,
);
if (openEachVariableBlockMatch) {
const [_, directiveStart, varName, eq, expr] =
openEachVariableBlockMatch;
const parsedExpression = highlightingExpressionParser.parse(
expr,
);
bodyEl = cx.elt(
2024-02-03 22:28:24 +08:00
"TemplateEachVarStartDirective",
pos + 2,
endPos - 2,
[
cx.elt(
2024-02-03 22:28:24 +08:00
"TemplateVar",
pos + 2 + directiveStart.length,
pos + 2 + directiveStart.length + varName.length,
),
cx.elt(
parsedExpression,
pos + 2 + directiveStart.length + varName.length + eq.length,
),
],
);
}
}
2024-02-03 22:28:24 +08:00
if (!bodyEl) {
// Is this an open block directive?
const openBlockMatch = /^(\s*#(if|each)\s*)(.+)$/s.exec(bodyText);
if (openBlockMatch) {
const [_, directiveStart, directiveType, directiveBody] =
openBlockMatch;
const parsedExpression = highlightingExpressionParser.parse(
directiveBody,
);
bodyEl = cx.elt(
directiveType === "if"
? "TemplateIfStartDirective"
: "TemplateEachStartDirective",
pos + 2,
endPos - 2,
[cx.elt(parsedExpression, pos + 2 + directiveStart.length)],
);
}
}
if (!bodyEl) {
// Is this a directive close?
const closeBlockMatch = /^\s*\/(if|each|let)/.exec(bodyText);
if (closeBlockMatch) {
const [_, directiveType] = closeBlockMatch;
const upCaseDirectiveType = directiveType[0].toUpperCase() +
directiveType.slice(1);
bodyEl = cx.elt(
`Template${upCaseDirectiveType}EndDirective`,
pos + 2,
endPos - 2,
);
}
}
if (!bodyEl) {
// Let's parse as an expression
const parsedExpression = highlightingExpressionParser.parse(bodyText);
bodyEl = cx.elt(
"TemplateExpressionDirective",
pos + 2,
endPos - 2,
[cx.elt(parsedExpression, pos + 2)],
);
}
return cx.addElement(
cx.elt("TemplateDirective", pos, endPos, [
cx.elt("TemplateDirectiveMark", pos, pos + 2),
bodyEl!,
cx.elt("TemplateDirectiveMark", endPos - 2, endPos),
]),
);
},
after: "Emphasis",
},
],
};
const LuaDirectives: MarkdownConfig = {
defineNodes: [
{ name: "LuaDirective" },
{ name: "LuaExpressionDirective" },
{ name: "LuaDirectiveMark", style: ct.DirectiveMarkTag },
],
parseInline: [
{
name: "LuaDirective",
parse(cx, next, pos) {
const textFromPos = cx.slice(pos, cx.end);
if (
next !== 36 /* '$' */ ||
cx.slice(pos, pos + 2) !== "${"
) {
return -1;
}
let bracketNestingDepth = 0;
let valueLength = 0;
// We need to ensure balanced { and } pairs
loopLabel:
for (; valueLength < textFromPos.length; valueLength++) {
switch (textFromPos[valueLength]) {
case "{":
bracketNestingDepth++;
break;
case "}":
bracketNestingDepth--;
if (bracketNestingDepth === 0) {
// Done!
break loopLabel;
}
break;
}
}
if (bracketNestingDepth !== 0) {
return -1;
}
const bodyText = textFromPos.slice(2, valueLength);
const endPos = pos + valueLength + 1;
// Let's parse as an expression
const parsedExpression = luaLanguage.parser.parse(`_(${bodyText})`);
const node = parsedExpression.resolveInner(2, 0).firstChild?.nextSibling
?.nextSibling;
if (!node) {
return -1;
}
const bodyEl = cx.elt(
"LuaExpressionDirective",
pos + 2,
endPos - 1,
[cx.elt(node.toTree()!, pos + 2)],
);
return cx.addElement(
cx.elt("LuaDirective", pos, endPos, [
cx.elt("LuaDirectiveMark", pos, pos + 2),
bodyEl,
cx.elt("LuaDirectiveMark", endPos - 1, endPos),
]),
);
},
after: "Emphasis",
},
],
};
const HighlightDelim = { resolve: "Highlight", mark: "HighlightMark" };
2022-12-05 20:05:48 +08:00
export const Highlight: MarkdownConfig = {
defineNodes: [
{
name: "Highlight",
style: { "Highlight/...": ct.Highlight },
},
{
name: "HighlightMark",
style: t.processingInstruction,
},
],
parseInline: [
{
name: "Highlight",
parse(cx, next, pos) {
if (next != 61 /* '=' */ || cx.char(pos + 1) != 61) return -1;
return cx.addDelimiter(HighlightDelim, pos, pos + 2, true, true);
},
after: "Emphasis",
},
],
};
import { parser as queryParser } from "./parse-query.js";
const expressionStyleTags = styleTags({
Identifier: t.variableName,
TagIdentifier: t.variableName,
GlobalIdentifier: t.variableName,
String: t.string,
Number: t.number,
PageRef: ct.WikiLinkTag,
BinExpression: t.operator,
TernaryExpression: t.operator,
Regex: t.regexp,
"where limit select render desc asc and or null as in true false not each all Order/...":
t.keyword,
});
export const highlightingQueryParser = queryParser.configure({
props: [
expressionStyleTags,
],
});
import { parser as expressionParser } from "./parse-expression.js";
export const highlightingExpressionParser = expressionParser.configure({
props: [expressionStyleTags],
});
export const attributeStartRegex = /^\[([\w\$]+)(::?\s*)/;
2023-07-25 01:54:31 +08:00
export const Attribute: MarkdownConfig = {
defineNodes: [
{ name: "Attribute", style: { "Attribute/...": ct.AttributeTag } },
{ name: "AttributeName", style: ct.AttributeNameTag },
{ name: "AttributeValue", style: ct.AttributeValueTag },
{ name: "AttributeMark", style: t.processingInstruction },
{ name: "AttributeColon", style: t.processingInstruction },
],
parseInline: [
{
name: "Attribute",
parse(cx, next, pos) {
let match: RegExpMatchArray | null;
2023-07-26 23:12:56 +08:00
const textFromPos = cx.slice(pos, cx.end);
2023-07-25 01:54:31 +08:00
if (
next != 91 /* '[' */ ||
// and match the whole thing
2023-07-26 23:12:56 +08:00
!(match = attributeStartRegex.exec(textFromPos))
2023-07-25 01:54:31 +08:00
) {
return -1;
}
2023-07-26 23:12:56 +08:00
const [fullMatch, attributeName, attributeColon] = match;
let bracketNestingDepth = 1;
let valueLength = fullMatch.length;
loopLabel:
for (; valueLength < textFromPos.length; valueLength++) {
switch (textFromPos[valueLength]) {
case "[":
bracketNestingDepth++;
break;
case "]":
bracketNestingDepth--;
if (bracketNestingDepth === 0) {
// Done!
break loopLabel;
}
break;
}
}
if (bracketNestingDepth !== 0) {
console.log("Failed to parse attribute", fullMatch, textFromPos);
return -1;
}
if (textFromPos[valueLength + 1] === "(") {
// This turns out to be a link, back out!
return -1;
}
2023-07-25 01:54:31 +08:00
return cx.addElement(
2023-07-26 23:12:56 +08:00
cx.elt("Attribute", pos, pos + valueLength + 1, [
2023-07-25 01:54:31 +08:00
cx.elt("AttributeMark", pos, pos + 1), // [
cx.elt("AttributeName", pos + 1, pos + 1 + attributeName.length),
cx.elt(
"AttributeColon",
pos + 1 + attributeName.length,
pos + 1 + attributeName.length + attributeColon.length,
),
cx.elt(
"AttributeValue",
pos + 1 + attributeName.length + attributeColon.length,
2023-07-26 23:12:56 +08:00
pos + valueLength,
2023-07-25 01:54:31 +08:00
),
2023-07-26 23:12:56 +08:00
cx.elt("AttributeMark", pos + valueLength, pos + valueLength + 1), // [
2023-07-25 01:54:31 +08:00
]),
);
},
after: "Emphasis",
},
],
};
class CommentParser implements LeafBlockParser {
nextLine() {
return false;
}
finish(cx: BlockContext, leaf: LeafBlock) {
cx.addLeafElement(
leaf,
cx.elt("Comment", leaf.start, leaf.start + leaf.content.length, [
// cx.elt("CommentMarker", leaf.start, leaf.start + 3),
...cx.parser.parseInline(leaf.content.slice(3), leaf.start + 3),
]),
);
return true;
}
}
export const Comment: MarkdownConfig = {
defineNodes: [{ name: "Comment", block: true }],
parseBlock: [
{
name: "Comment",
leaf(_cx, leaf) {
return /^%%\s/.test(leaf.content) ? new CommentParser() : null;
},
after: "SetextHeading",
},
],
};
2024-01-24 20:34:12 +08:00
type RegexParserExtension = {
// unicode char code for efficiency .charCodeAt(0)
firstCharCode: number;
regex: RegExp;
nodeType: string;
tag: Tag;
className?: string;
};
function regexParser({
regex,
firstCharCode,
nodeType,
}: RegexParserExtension): MarkdownConfig {
return {
defineNodes: [nodeType],
parseInline: [
{
name: nodeType,
parse(cx, next, pos) {
if (firstCharCode !== next) {
return -1;
}
const match = regex.exec(cx.slice(pos, cx.end));
if (!match) {
return -1;
}
return cx.addElement(cx.elt(nodeType, pos, pos + match[0].length));
},
},
],
};
}
const NakedURL = regexParser(
{
firstCharCode: 104, // h
regex:
/(^https?:\/\/([-a-zA-Z0-9@:%_\+~#=]|(?:[.](?!(\s|$)))){1,256})(([-a-zA-Z0-9(@:%_\+~#?&=\/]|(?:[.,:;)](?!(\s|$))))*)/,
2024-01-24 20:34:12 +08:00
nodeType: "NakedURL",
className: "sb-naked-url",
tag: NakedURLTag,
},
);
const Hashtag = regexParser({
firstCharCode: 35, // #
regex: new RegExp(`^${tagRegex.source}`),
nodeType: "Hashtag",
className: "sb-hashtag-text",
tag: ct.HashtagTag,
});
2024-01-24 20:34:12 +08:00
const TaskDeadline = regexParser({
firstCharCode: 55357, // 📅
regex: /^📅\s*\d{4}\-\d{2}\-\d{2}/,
className: "sb-task-deadline",
nodeType: "DeadlineDate",
tag: ct.TaskDeadlineTag,
});
const NamedAnchor = regexParser({
firstCharCode: 36, // $
regex: /^\$[a-zA-Z\.\-\/]+[\w\.\-\/]*/,
className: "sb-named-anchor",
nodeType: "NamedAnchor",
tag: ct.NamedAnchorTag,
});
2023-01-13 23:59:28 +08:00
import { Table } from "./table_parser.ts";
2023-12-17 21:26:41 +08:00
import { foldNodeProp } from "@codemirror/language";
import { pWikiLinkRegex, tagRegex } from "$common/markdown_parser/constants.ts";
2024-08-22 02:13:40 +08:00
import { parse } from "$common/markdown_parser/parse_tree.ts";
import type { ParseTree } from "@silverbulletmd/silverbullet/lib/tree";
import { luaLanguage } from "$common/space_lua/parse.ts";
// FrontMatter parser
const yamlLang = StreamLanguage.define(yamlLanguage);
export const FrontMatter: MarkdownConfig = {
defineNodes: [
{ name: "FrontMatter", block: true },
{ name: "FrontMatterMarker" },
{ name: "FrontMatterCode" },
],
parseBlock: [{
name: "FrontMatter",
parse: (cx, line: Line) => {
if (cx.parsedPos !== 0) {
return false;
}
if (line.text !== "---") {
return false;
}
const frontStart = cx.parsedPos;
const elts = [
cx.elt(
"FrontMatterMarker",
cx.parsedPos,
cx.parsedPos + line.text.length + 1,
),
];
cx.nextLine();
const startPos = cx.parsedPos;
let endPos = startPos;
let text = "";
let lastPos = cx.parsedPos;
do {
text += line.text + "\n";
endPos += line.text.length + 1;
cx.nextLine();
if (cx.parsedPos === lastPos) {
// End of file, no progress made, there may be a better way to do this but :shrug:
return false;
}
lastPos = cx.parsedPos;
} while (line.text !== "---");
const yamlTree = yamlLang.parser.parse(text);
elts.push(
cx.elt("FrontMatterCode", startPos, endPos, [
cx.elt(yamlTree, startPos),
]),
);
endPos = cx.parsedPos + line.text.length;
elts.push(cx.elt(
"FrontMatterMarker",
cx.parsedPos,
cx.parsedPos + line.text.length,
));
cx.nextLine();
cx.addElement(cx.elt("FrontMatter", frontStart, endPos, elts));
return true;
},
before: "HorizontalRule",
}],
};
2024-01-24 20:34:12 +08:00
export const extendedMarkdownLanguage = markdown({
extensions: [
WikiLink,
CommandLink,
Attribute,
FrontMatter,
TaskList,
Comment,
Highlight,
TemplateDirective,
LuaDirectives,
2024-01-24 20:34:12 +08:00
Strikethrough,
Table,
NakedURL,
Hashtag,
TaskDeadline,
NamedAnchor,
Superscript,
Subscript,
2024-01-24 20:34:12 +08:00
{
props: [
foldNodeProp.add({
// Don't fold at the list level
BulletList: () => null,
OrderedList: () => null,
// Fold list items
ListItem: (tree, state) => ({
from: state.doc.lineAt(tree.from).to,
to: tree.to,
2023-12-17 21:26:41 +08:00
}),
2024-01-24 20:34:12 +08:00
// Fold frontmatter
FrontMatter: (tree) => ({
from: tree.from,
to: tree.to,
2022-04-12 02:34:09 +08:00
}),
2024-01-24 20:34:12 +08:00
}),
styleTags({
Task: ct.TaskTag,
TaskMark: ct.TaskMarkTag,
Comment: ct.CommentTag,
"Subscript": ct.SubscriptTag,
"Superscript": ct.SuperscriptTag,
"TableDelimiter StrikethroughMark": t.processingInstruction,
2024-01-24 20:34:12 +08:00
"TableHeader/...": t.heading,
TableCell: t.content,
CodeInfo: ct.CodeInfoTag,
HorizontalRule: ct.HorizontalRuleTag,
Hashtag: ct.HashtagTag,
NakedURL: ct.NakedURLTag,
DeadlineDate: ct.TaskDeadlineTag,
NamedAnchor: ct.NamedAnchorTag,
}),
],
},
],
}).language;
2024-08-22 02:13:40 +08:00
export function parseMarkdown(text: string): ParseTree {
return parse(extendedMarkdownLanguage, text);
}