silverbullet/common/markdown_parser/parser.ts

478 lines
13 KiB
TypeScript
Raw Normal View History

2022-04-25 16:33:38 +08:00
import {
BlockContext,
Language,
2022-04-25 16:33:38 +08:00
LeafBlock,
LeafBlockParser,
Line,
markdown,
2022-04-25 16:33:38 +08:00
MarkdownConfig,
StreamLanguage,
2022-12-05 20:05:48 +08:00
Strikethrough,
styleTags,
tags as t,
yamlLanguage,
} from "../deps.ts";
import * as ct from "./customtags.ts";
import { TaskList } from "./extended_task.ts";
2022-04-25 16:33:38 +08:00
import {
MDExt,
mdExtensionStyleTags,
mdExtensionSyntaxConfig,
} from "./markdown_ext.ts";
2022-11-27 15:48:01 +08:00
export const pageLinkRegex = /^\[\[([^\]\|]+)(\|([^\]]+))?\]\]/;
const WikiLink: MarkdownConfig = {
2022-11-29 16:11:23 +08:00
defineNodes: [
{ name: "WikiLink", style: ct.WikiLinkTag },
{ name: "WikiLinkPage", style: ct.WikiLinkPageTag },
{ name: "WikiLinkAlias", style: ct.WikiLinkPageTag },
{ name: "WikiLinkMark", style: t.processingInstruction },
],
parseInline: [
{
name: "WikiLink",
parse(cx, next, pos) {
let match: RegExpMatchArray | null;
if (
next != 91 /* '[' */ ||
2022-04-10 17:04:07 +08:00
!(match = pageLinkRegex.exec(cx.slice(pos, cx.end)))
) {
return -1;
}
2022-11-29 16:11:23 +08:00
const [fullMatch, page, pipePart, label] = match;
const endPos = pos + fullMatch.length;
2022-11-27 15:48:01 +08:00
let aliasElts: any[] = [];
if (pipePart) {
const pipeStartPos = pos + 2 + page.length;
aliasElts = [
cx.elt("WikiLinkMark", pipeStartPos, pipeStartPos + 1),
cx.elt(
"WikiLinkAlias",
pipeStartPos + 1,
pipeStartPos + 1 + label.length,
),
];
}
return cx.addElement(
cx.elt("WikiLink", pos, endPos, [
cx.elt("WikiLinkMark", pos, pos + 2),
2022-11-27 15:48:01 +08:00
cx.elt("WikiLinkPage", pos + 2, pos + 2 + page.length),
...aliasElts,
cx.elt("WikiLinkMark", endPos - 2, endPos),
]),
);
},
after: "Emphasis",
},
],
};
export const commandLinkRegex = /^\{\[([^\]\|]+)(\|([^\]]+))?\](\(([^\)]+)\))?\}/;
const CommandLink: MarkdownConfig = {
defineNodes: [
{ name: "CommandLink", style: { "CommandLink/...": ct.CommandLinkTag } },
{ name: "CommandLinkName", style: ct.CommandLinkNameTag },
2022-11-29 16:11:23 +08:00
{ name: "CommandLinkAlias", style: ct.CommandLinkNameTag },
{ name: "CommandLinkArgs", style: ct.CommandLinkArgsTag },
2022-11-29 16:11:23 +08:00
{ name: "CommandLinkMark", style: t.processingInstruction },
],
parseInline: [
{
name: "CommandLink",
parse(cx, next, pos) {
let match: RegExpMatchArray | null;
if (
next != 123 /* '{' */ ||
!(match = commandLinkRegex.exec(cx.slice(pos, cx.end)))
) {
return -1;
}
const [fullMatch, command, pipePart, label, argsPart, args] = match;
2022-11-29 16:11:23 +08:00
const endPos = pos + fullMatch.length;
let aliasElts: any[] = [];
if (pipePart) {
const pipeStartPos = pos + 2 + command.length;
aliasElts = [
cx.elt("CommandLinkMark", pipeStartPos, pipeStartPos + 1),
cx.elt(
"CommandLinkAlias",
pipeStartPos + 1,
pipeStartPos + 1 + label.length,
),
];
}
let argsElts: any[] = [];
if (argsPart) {
const argsStartPos = pos + 2 + command.length + (pipePart?.length ?? 0);
argsElts = [
cx.elt("CommandLinkMark", argsStartPos, argsStartPos + 2),
cx.elt(
"CommandLinkArgs",
argsStartPos + 2,
argsStartPos + 2 + args.length,
),
];
}
return cx.addElement(
cx.elt("CommandLink", pos, endPos, [
cx.elt("CommandLinkMark", pos, pos + 2),
2022-11-29 16:11:23 +08:00
cx.elt("CommandLinkName", pos + 2, pos + 2 + command.length),
...aliasElts,
...argsElts,
cx.elt("CommandLinkMark", endPos - 2, endPos),
]),
);
},
after: "Emphasis",
},
],
};
const HighlightDelim = { resolve: "Highlight", mark: "HighlightMark" };
2022-12-05 20:05:48 +08:00
export const Highlight: MarkdownConfig = {
defineNodes: [
{
name: "Highlight",
style: { "Highlight/...": ct.Highlight },
},
{
name: "HighlightMark",
style: t.processingInstruction,
},
],
parseInline: [
{
name: "Highlight",
parse(cx, next, pos) {
if (next != 61 /* '=' */ || cx.char(pos + 1) != 61) return -1;
return cx.addDelimiter(HighlightDelim, pos, pos + 2, true, true);
},
after: "Emphasis",
},
],
};
export const attributeStartRegex = /^\[([\w\$]+)(::?\s*)/;
2023-07-25 01:54:31 +08:00
export const Attribute: MarkdownConfig = {
defineNodes: [
{ name: "Attribute", style: { "Attribute/...": ct.AttributeTag } },
{ name: "AttributeName", style: ct.AttributeNameTag },
{ name: "AttributeValue", style: ct.AttributeValueTag },
{ name: "AttributeMark", style: t.processingInstruction },
{ name: "AttributeColon", style: t.processingInstruction },
],
parseInline: [
{
name: "Attribute",
parse(cx, next, pos) {
let match: RegExpMatchArray | null;
2023-07-26 23:12:56 +08:00
const textFromPos = cx.slice(pos, cx.end);
2023-07-25 01:54:31 +08:00
if (
next != 91 /* '[' */ ||
// and match the whole thing
2023-07-26 23:12:56 +08:00
!(match = attributeStartRegex.exec(textFromPos))
2023-07-25 01:54:31 +08:00
) {
return -1;
}
2023-07-26 23:12:56 +08:00
const [fullMatch, attributeName, attributeColon] = match;
let bracketNestingDepth = 1;
let valueLength = fullMatch.length;
loopLabel:
for (; valueLength < textFromPos.length; valueLength++) {
switch (textFromPos[valueLength]) {
case "[":
bracketNestingDepth++;
break;
case "]":
bracketNestingDepth--;
if (bracketNestingDepth === 0) {
// Done!
break loopLabel;
}
break;
}
}
if (bracketNestingDepth !== 0) {
console.log("Failed to parse attribute", fullMatch, textFromPos);
return -1;
}
if (textFromPos[valueLength + 1] === "(") {
console.log("Link", fullMatch, textFromPos);
// This turns out to be a link, back out!
return -1;
}
2023-07-25 01:54:31 +08:00
return cx.addElement(
2023-07-26 23:12:56 +08:00
cx.elt("Attribute", pos, pos + valueLength + 1, [
2023-07-25 01:54:31 +08:00
cx.elt("AttributeMark", pos, pos + 1), // [
cx.elt("AttributeName", pos + 1, pos + 1 + attributeName.length),
cx.elt(
"AttributeColon",
pos + 1 + attributeName.length,
pos + 1 + attributeName.length + attributeColon.length,
),
cx.elt(
"AttributeValue",
pos + 1 + attributeName.length + attributeColon.length,
2023-07-26 23:12:56 +08:00
pos + valueLength,
2023-07-25 01:54:31 +08:00
),
2023-07-26 23:12:56 +08:00
cx.elt("AttributeMark", pos + valueLength, pos + valueLength + 1), // [
2023-07-25 01:54:31 +08:00
]),
);
},
after: "Emphasis",
},
],
};
class CommentParser implements LeafBlockParser {
nextLine() {
return false;
}
finish(cx: BlockContext, leaf: LeafBlock) {
cx.addLeafElement(
leaf,
cx.elt("Comment", leaf.start, leaf.start + leaf.content.length, [
// cx.elt("CommentMarker", leaf.start, leaf.start + 3),
...cx.parser.parseInline(leaf.content.slice(3), leaf.start + 3),
]),
);
return true;
}
}
export const Comment: MarkdownConfig = {
defineNodes: [{ name: "Comment", block: true }],
parseBlock: [
{
name: "Comment",
leaf(_cx, leaf) {
return /^%%\s/.test(leaf.content) ? new CommentParser() : null;
},
after: "SetextHeading",
},
],
};
// Directive parser
const directiveStart = /^\s*<!--\s*#([a-z]+)\s*(.*?)-->\s*/;
const directiveEnd = /^\s*<!--\s*\/(.*?)-->\s*/;
import { parser as directiveParser } from "./parse-query.js";
import { parser as expressionParser } from "./parse-expression.js";
2023-01-13 23:59:28 +08:00
import { Table } from "./table_parser.ts";
export const highlightingDirectiveParser = directiveParser.configure({
props: [
styleTags({
"Name": t.variableName,
2022-12-19 20:42:20 +08:00
"String": t.string,
"Number": t.number,
2022-12-19 20:42:20 +08:00
"PageRef": ct.WikiLinkTag,
2023-10-30 21:15:12 +08:00
"where limit select render Order OrderKW and or as InKW each all":
t.keyword,
}),
],
});
export const Directive: MarkdownConfig = {
defineNodes: [
{ name: "Directive", block: true, style: ct.DirectiveTag },
{ name: "DirectiveStart", style: ct.DirectiveStartTag, block: true },
{ name: "DirectiveEnd", style: ct.DirectiveEndTag },
{ name: "DirectiveBody", block: true },
],
parseBlock: [{
name: "Directive",
parse: (cx, line: Line) => {
const match = directiveStart.exec(line.text);
if (!match) {
return false;
}
// console.log("Parsing directive", line.text);
const frontStart = cx.parsedPos;
const [fullMatch, directive, arg] = match;
const elts = [];
if (directive === "query") {
const queryParseTree = highlightingDirectiveParser.parse(arg);
elts.push(cx.elt(
"DirectiveStart",
cx.parsedPos,
cx.parsedPos + line.text.length + 1,
[cx.elt(queryParseTree, frontStart + fullMatch.indexOf(arg))],
));
} else if (directive === "eval") {
const expressionParseTree = expressionParser.parse(arg);
elts.push(cx.elt(
"DirectiveStart",
cx.parsedPos,
cx.parsedPos + line.text.length + 1,
[cx.elt(expressionParseTree, frontStart + fullMatch.indexOf(arg))],
));
} else {
elts.push(cx.elt(
"DirectiveStart",
cx.parsedPos,
cx.parsedPos + line.text.length + 1,
));
}
// console.log("Query parse tree", queryParseTree.topNode);
cx.nextLine();
const startPos = cx.parsedPos;
let endPos = startPos;
let text = "";
let lastPos = cx.parsedPos;
let nesting = 0;
while (true) {
if (directiveEnd.exec(line.text) && nesting === 0) {
break;
}
text += line.text + "\n";
endPos += line.text.length + 1;
if (directiveStart.exec(line.text)) {
nesting++;
}
if (directiveEnd.exec(line.text)) {
nesting--;
}
cx.nextLine();
if (cx.parsedPos === lastPos) {
// End of file, no progress made, there may be a better way to do this but :shrug:
return false;
}
lastPos = cx.parsedPos;
}
const directiveBodyTree = cx.parser.parse(text);
elts.push(
cx.elt("DirectiveBody", startPos, endPos, [
cx.elt(directiveBodyTree, startPos),
]),
);
endPos = cx.parsedPos + line.text.length;
elts.push(cx.elt(
"DirectiveEnd",
cx.parsedPos,
cx.parsedPos + line.text.length,
));
cx.nextLine();
cx.addElement(cx.elt("Directive", frontStart, endPos, elts));
return true;
},
before: "HTMLBlock",
}],
};
// FrontMatter parser
const yamlLang = StreamLanguage.define(yamlLanguage);
export const FrontMatter: MarkdownConfig = {
defineNodes: [
{ name: "FrontMatter", block: true },
{ name: "FrontMatterMarker" },
{ name: "FrontMatterCode" },
],
parseBlock: [{
name: "FrontMatter",
parse: (cx, line: Line) => {
if (cx.parsedPos !== 0) {
return false;
}
if (line.text !== "---") {
return false;
}
const frontStart = cx.parsedPos;
const elts = [
cx.elt(
"FrontMatterMarker",
cx.parsedPos,
cx.parsedPos + line.text.length + 1,
),
];
cx.nextLine();
const startPos = cx.parsedPos;
let endPos = startPos;
let text = "";
let lastPos = cx.parsedPos;
do {
text += line.text + "\n";
endPos += line.text.length + 1;
cx.nextLine();
if (cx.parsedPos === lastPos) {
// End of file, no progress made, there may be a better way to do this but :shrug:
return false;
}
lastPos = cx.parsedPos;
} while (line.text !== "---");
const yamlTree = yamlLang.parser.parse(text);
elts.push(
cx.elt("FrontMatterCode", startPos, endPos, [
cx.elt(yamlTree, startPos),
]),
);
endPos = cx.parsedPos + line.text.length;
elts.push(cx.elt(
"FrontMatterMarker",
cx.parsedPos,
cx.parsedPos + line.text.length,
));
cx.nextLine();
cx.addElement(cx.elt("FrontMatter", frontStart, endPos, elts));
return true;
},
before: "HorizontalRule",
}],
};
2022-04-12 02:34:09 +08:00
export default function buildMarkdown(mdExtensions: MDExt[]): Language {
2022-09-13 14:41:01 +08:00
return markdown({
extensions: [
2022-04-12 02:34:09 +08:00
WikiLink,
CommandLink,
2023-07-25 01:54:31 +08:00
Attribute,
FrontMatter,
Directive,
2022-04-12 02:34:09 +08:00
TaskList,
Comment,
2022-12-05 20:05:48 +08:00
Highlight,
Strikethrough,
Table,
2022-04-12 02:34:09 +08:00
...mdExtensions.map(mdExtensionSyntaxConfig),
2022-04-12 02:34:09 +08:00
{
props: [
styleTags({
Task: ct.TaskTag,
TaskMark: ct.TaskMarkTag,
2022-04-12 02:34:09 +08:00
Comment: ct.CommentTag,
"TableDelimiter SubscriptMark SuperscriptMark StrikethroughMark":
t.processingInstruction,
"TableHeader/...": t.heading,
TableCell: t.content,
2022-04-25 00:06:34 +08:00
CodeInfo: ct.CodeInfoTag,
2022-08-29 22:48:52 +08:00
HorizontalRule: ct.HorizontalRuleTag,
2022-04-12 02:34:09 +08:00
}),
...mdExtensions.map((mdExt) =>
styleTags(mdExtensionStyleTags(mdExt))
),
],
},
2022-09-13 14:41:01 +08:00
],
}).language;
2022-04-12 02:34:09 +08:00
}