Materialized query parser + data indexing and querying

pull/3/head
Zef Hemel 2022-04-12 13:33:07 +02:00
parent b3c3302970
commit a24eaaf4b4
16 changed files with 249 additions and 133 deletions

View File

@ -11,7 +11,7 @@ syntax:
regex: "@[A-Za-z\\.]+"
styles:
color: blue
URL:
NakedURL:
firstCharacters:
- "h"
regex: "https?:\\/\\/[-a-zA-Z0-9@:%._\\+~#=]{1,256}([-a-zA-Z0-9()@:%_\\+.~#?&=\\/]*)"

View File

@ -3,11 +3,14 @@ import { whiteOutQueries } from "../query/materialized_queries";
import { batchSet } from "plugos-silverbullet-syscall/index";
import { parseMarkdown } from "plugos-silverbullet-syscall/markdown";
import { collectNodesMatching, ParseTree, renderToText } from "../../common/tree";
import { collectNodesOfType, ParseTree, renderToText } from "../../common/tree";
type Item = {
item: string;
export type Item = {
name: string;
nested?: string;
// Not stored in DB
page?: string;
pos?: number;
};
export async function indexItems({ name, text }: IndexEvent) {
@ -17,7 +20,7 @@ export async function indexItems({ name, text }: IndexEvent) {
console.log("Indexing items", name);
let mdTree = await parseMarkdown(text);
let coll = collectNodesMatching(mdTree, (n) => n.type === "ListItem");
let coll = collectNodesOfType(mdTree, "ListItem");
coll.forEach((n) => {
if (!n.children) {
@ -34,7 +37,7 @@ export async function indexItems({ name, text }: IndexEvent) {
}
let item = textNodes.map(renderToText).join("").trim();
let value: Item = {
item,
name: item,
};
if (nested) {
value.nested = nested;

View File

@ -20,6 +20,7 @@ async function actionClickOrActionEnter(mdTree: ParseTree | null) {
await navigateTo(pageLink, +pos);
break;
case "URL":
case "NakedURL":
await openUrl(mdTree.children![0].text!);
break;
case "Link":

File diff suppressed because one or more lines are too long

54
plugs/query/data.ts Normal file
View File

@ -0,0 +1,54 @@
// Index key space:
// data:page@pos
import { IndexEvent } from "../../webapp/app_event";
import { whiteOutQueries } from "./materialized_queries";
import { batchSet } from "plugos-silverbullet-syscall";
import { parseMarkdown } from "plugos-silverbullet-syscall/markdown";
import { collectNodesOfType, findNodeOfType } from "../../common/tree";
import YAML from "yaml";
export async function indexData({ name, text }: IndexEvent) {
let e;
text = whiteOutQueries(text);
console.log("Now data indexing", name);
console.log("Indexing items", name);
let mdTree = await parseMarkdown(text);
let dataObjects: { key: string; value: Object }[] = [];
collectNodesOfType(mdTree, "FencedCode").forEach((t) => {
let codeInfoNode = findNodeOfType(t, "CodeInfo");
if (!codeInfoNode) {
return;
}
if (codeInfoNode.children![0].text !== "data") {
return;
}
let codeTextNode = findNodeOfType(t, "CodeText");
if (!codeTextNode) {
// Honestly, this shouldn't happen
return;
}
let codeText = codeTextNode.children![0].text!;
try {
// We support multiple YAML documents in one block
for (let doc of YAML.parseAllDocuments(codeText)) {
if (!doc.contents) {
continue;
}
console.log(doc.contents.toJSON());
dataObjects.push({
key: `data:${name}@${t.from! + doc.range[0]}`,
value: doc.contents.toJSON(),
});
}
// console.log("Parsed data", parsedData);
} catch (e) {
console.error("Could not parse data", codeText, "error:", e);
return;
}
});
console.log("Found", dataObjects, "data objects");
await batchSet(name, dataObjects);
}

View File

@ -24,13 +24,13 @@ test("Test parser", () => {
value: "{{today}}",
});
let parsedQuery2 = parseQuery(`page where name like "interview/%"`);
let parsedQuery2 = parseQuery(`page where name =~ /interview\\/.*/"`);
expect(parsedQuery2.table).toBe("page");
expect(parsedQuery2.filter.length).toBe(1);
expect(parsedQuery2.filter[0]).toStrictEqual({
op: "like",
op: "=~",
prop: "name",
value: "interview/%",
value: /interview\/.*/,
});
});
@ -42,29 +42,36 @@ test("Test performing the queries", () => {
{ name: "Angie", age: 28 },
];
expect(applyQuery(`page where name like "interview/%"`, data)).toStrictEqual([
{ name: "interview/My Interview", lastModified: 1 },
{ name: "interview/My Interview 2", lastModified: 2 },
]);
expect(
applyQuery(`page where name like "interview/%" order by lastModified`, data)
applyQuery(parseQuery(`page where name =~ /interview\\/.*/`), data)
).toStrictEqual([
{ name: "interview/My Interview", lastModified: 1 },
{ name: "interview/My Interview 2", lastModified: 2 },
]);
expect(
applyQuery(
`page where name like "interview/%" order by lastModified desc`,
parseQuery(`page where name =~ /interview\\/.*/ order by lastModified`),
data
)
).toStrictEqual([
{ name: "interview/My Interview", lastModified: 1 },
{ name: "interview/My Interview 2", lastModified: 2 },
]);
expect(
applyQuery(
parseQuery(
`page where name =~ /interview\\/.*/ order by lastModified desc`
),
data
)
).toStrictEqual([
{ name: "interview/My Interview 2", lastModified: 2 },
{ name: "interview/My Interview", lastModified: 1 },
]);
expect(applyQuery(`page where age > 30`, data)).toStrictEqual([
expect(applyQuery(parseQuery(`page where age > 30`), data)).toStrictEqual([
{ name: "Pete", age: 38 },
]);
expect(applyQuery(`page where age > 28 and age < 38`, data)).toStrictEqual(
[]
);
expect(
applyQuery(parseQuery(`page where age > 28 and age < 38`), data)
).toStrictEqual([]);
});

View File

@ -31,6 +31,8 @@ export function parseQuery(query: string): ParsedQuery {
}
});
// console.log("Parsed", JSON.stringify(n, null, 2));
let queryNode = n.children![0];
let parsedQuery: ParsedQuery = {
table: queryNode.children![0].children![0].text!,
@ -64,6 +66,10 @@ export function parseQuery(query: string): ParsedQuery {
case "Name":
val = valNode.children![0].text!;
break;
case "Regex":
val = valNode.children![0].text!;
val = new RegExp(val.substring(1, val.length - 1));
break;
case "String":
val = valNode.children![0].text!;
val = val.substring(1, val.length - 1);
@ -80,55 +86,53 @@ export function parseQuery(query: string): ParsedQuery {
return parsedQuery;
}
export function applyQuery(query: string, records: any[]): any {
const parsedQuery = parseQuery(query);
export function applyQuery<T>(parsedQuery: ParsedQuery, records: T[]): T[] {
let resultRecords: any[] = [];
if (parsedQuery.filter.length === 0) {
resultRecords = records.slice();
} else {
recordLoop: for (let record of records) {
const recordAny: any = record;
for (let { op, prop, value } of parsedQuery.filter) {
switch (op) {
case "=":
if (!(record[prop] === value)) {
if (!(recordAny[prop] === value)) {
continue recordLoop;
}
break;
case "!=":
if (!(record[prop] !== value)) {
if (!(recordAny[prop] !== value)) {
continue recordLoop;
}
break;
case "<":
if (!(record[prop] < value)) {
if (!(recordAny[prop] < value)) {
continue recordLoop;
}
break;
case "<=":
if (!(record[prop] <= value)) {
if (!(recordAny[prop] <= value)) {
continue recordLoop;
}
break;
case ">":
if (!(record[prop] > value)) {
if (!(recordAny[prop] > value)) {
continue recordLoop;
}
break;
case ">=":
if (!(record[prop] >= value)) {
if (!(recordAny[prop] >= value)) {
continue recordLoop;
}
break;
case "like":
let re = new RegExp(value.replaceAll("%", ".*"));
if (!re.exec(record[prop])) {
case "=~":
if (!value.exec(recordAny[prop])) {
continue recordLoop;
}
break;
}
}
resultRecords.push(record);
resultRecords.push(recordAny);
}
}
// Now the sorting

View File

@ -4,15 +4,17 @@ import { listPages, readPage, writePage } from "plugos-silverbullet-syscall/spac
import { invokeFunction } from "plugos-silverbullet-syscall/system";
import { scanPrefixGlobal } from "plugos-silverbullet-syscall";
import { niceDate } from "../core/dates";
import { applyQuery, parseQuery } from "./engine";
import { PageMeta } from "../../common/types";
import type { Task } from "../tasks/task";
import { Item } from "../core/item";
import YAML from "yaml";
export const queryRegex =
/(<!--\s*#query\s+(?<table>\w+)\s*(filter\s+["'](?<filter>[^"']+)["'])?\s*\s*(order by\s+(?<orderBy>\w+)(?<orderDesc>\s+desc)?)?(group by\s+(?<groupBy>\w+))?\s*(limit\s+(?<limit>\d+))?\s*-->)(.+?)(<!--\s*#end\s*-->)/gs;
export const newQueryRegex =
/<!--\s*#query\s+(.+?)(?=\s*-->)-->(.+?)<!--\s*#end\s*-->/gs;
/(<!--\s*#query\s+(.+?)-->)(.+?)(<!--\s*#end\s*-->)/gs;
export function whiteOutQueries(text: string): string {
return text.replaceAll(newQueryRegex, (match) =>
return text.replaceAll(queryRegex, (match) =>
new Array(match.length + 1).join(" ")
);
}
@ -59,87 +61,86 @@ function replaceTemplateVars(s: string): string {
export async function updateMaterializedQueriesOnPage(pageName: string) {
let { text } = await readPage(pageName);
text = await replaceAsync(text, queryRegex, async (match, ...args) => {
let { table, filter, groupBy, limit, orderBy, orderDesc } =
args[args.length - 1];
const startQuery = args[0];
const endQuery = args[args.length - 4];
let results = [];
filter = filter && replaceTemplateVars(filter);
switch (table) {
case "page":
let pages = await listPages();
if (orderBy) {
pages = pages.sort((a: any, b: any) => {
if (a[orderBy] === b[orderBy]) {
return 0;
}
text = await replaceAsync(
text,
queryRegex,
async (fullMatch, startQuery, query, body, endQuery) => {
let parsedQuery = parseQuery(replaceTemplateVars(query));
if (a[orderBy] < b[orderBy]) {
return !!orderDesc ? 1 : -1;
} else {
return !!orderDesc ? -1 : 1;
}
});
}
let matchCount = 0;
for (let pageMeta of pages) {
if (!filter || (filter && pageMeta.name.includes(filter))) {
matchCount++;
results.push(`* [[${pageMeta.name}]]`);
if (limit && matchCount === +limit) {
break;
}
console.log("Parsed query", parsedQuery);
switch (parsedQuery.table) {
case "page":
let allPages = await listPages();
let markdownPages = applyQuery(parsedQuery, allPages).map(
(pageMeta: PageMeta) => `* [[${pageMeta.name}]]`
);
return `${startQuery}\n${markdownPages.join("\n")}\n${endQuery}`;
case "task":
let allTasks: Task[] = [];
for (let { key, page, value } of await scanPrefixGlobal("task:")) {
let [, pos] = key.split(":");
allTasks.push({
...value,
page: page,
pos: pos,
});
}
}
return `${startQuery}\n${results.join("\n")}\n${endQuery}`;
case "task":
for (let {
key,
page,
value: { task, complete, nested },
} of await scanPrefixGlobal("task:")) {
let [, pos] = key.split(":");
if (!filter || (filter && task.includes(filter))) {
results.push(
`* [${complete ? "x" : " "}] [[${page}@${pos}]] ${task}` +
(nested ? "\n " + nested : "")
);
}
}
return `${startQuery}\n${results.sort().join("\n")}\n${endQuery}`;
case "link":
let uniqueLinks = new Set<string>();
for (let { key, page, value: name } of await scanPrefixGlobal(
`pl:${pageName}:`
)) {
let [, pos] = key.split(":");
if (!filter || (filter && name.includes(filter))) {
let markdownTasks = applyQuery(parsedQuery, allTasks).map(
(t) =>
`* [${t.done ? "x" : " "}] [[${t.page}@${t.pos}]] ${t.name}` +
(t.nested ? "\n " + t.nested : "")
);
return `${startQuery}\n${markdownTasks.join("\n")}\n${endQuery}`;
case "link":
let uniqueLinks = new Set<string>();
for (let { value: name } of await scanPrefixGlobal(
`pl:${pageName}:`
)) {
uniqueLinks.add(name);
}
}
for (const uniqueResult of uniqueLinks) {
results.push(`* [[${uniqueResult}]]`);
}
return `${startQuery}\n${results.sort().join("\n")}\n${endQuery}`;
case "item":
for (let {
key,
page,
value: { item, nested },
} of await scanPrefixGlobal("it:")) {
let [, pos] = key.split(":");
if (!filter || (filter && item.includes(filter))) {
results.push(
`* [[${page}@${pos}]] ${item}` + (nested ? "\n " + nested : "")
);
let markdownLinks = applyQuery(
parsedQuery,
[...uniqueLinks].map((l) => ({ name: l }))
).map((pageMeta) => `* [[${pageMeta.name}]]`);
return `${startQuery}\n${markdownLinks.join("\n")}\n${endQuery}`;
case "item":
let allItems: Item[] = [];
for (let { key, page, value } of await scanPrefixGlobal("it:")) {
let [, pos] = key.split("@");
allItems.push({
...value,
page: page,
pos: +pos,
});
}
}
return `${startQuery}\n${results.sort().join("\n")}\n${endQuery}`;
default:
return match;
let markdownItems = applyQuery(parsedQuery, allItems).map(
(item) =>
`* [[${item.page}@${item.pos}]] ${item.name}` +
(item.nested ? "\n " + item.nested : "")
);
return `${startQuery}\n${markdownItems.join("\n")}\n${endQuery}`;
case "data":
let allData: Object[] = [];
for (let { key, page, value } of await scanPrefixGlobal("data:")) {
let [, pos] = key.split("@");
allData.push({
...value,
page: page,
pos: +pos,
});
}
let markdownData = applyQuery(parsedQuery, allData).map((item) =>
YAML.stringify(item)
);
return `${startQuery}\n\`\`\`data\n${markdownData.join(
"---\n"
)}\`\`\`\n${endQuery}`;
default:
return fullMatch;
}
}
});
);
// console.log("New text", text);
await writePage(pageName, text);
}

View File

@ -0,0 +1,17 @@
// This file was generated by lezer-generator. You probably shouldn't edit it.
import { LRParser } from "@lezer/lr";
export const parser = LRParser.deserialize({
version: 13,
states: "$UOVQPOOO[QQO'#C^QOQPOOOjQPO'#C`OoQQO'#CiOtQPO'#CkOOQO'#Cl'#ClOyQQO,58xO!XQPO'#CcO!pQQO'#CaOOQO'#Ca'#CaOOQO,58z,58zO#RQPO,59TOOQO,59V,59VOOQO-E6j-E6jO#WQQO,58}OjQPO,58|O#iQQO1G.oOOQO'#Cg'#CgOOQO'#Cd'#CdOOQO1G.i1G.iOOQO1G.h1G.hOOQO'#Cj'#CjOOQO7+$Z7+$Z",
stateData: "#}~OcOS~ORPO~OdROoSOsTOaQX~ORWO~Op[O~OX]O~OdROoSOsTOaQa~Oe_Oh_Oi_Oj_Ok_Ol_Om_O~On`OaTXdTXoTXsTX~ORaO~OXcOYcO[cOfbOgbO~OqfOrfOa]id]io]is]i~O",
goto: "!UaPPbPeilouPPxPe{e!ORQOTUPVRZRRYRQXRRe`Rd_Rc_RgaQVPR^V",
nodeNames: "⚠ Program Query Name WhereClause LogicalExpr AndExpr FilterExpr Value Number String Bool Regex OrderClause Order LimitClause",
maxTerm: 35,
skippedNodes: [0],
repeatNodeCount: 1,
tokenData: "3X~RsX^#`pq#`qr$Trs$`!P!Q$z!Q![%q!^!_%y!_!`&W!`!a&e!c!}&r#T#U&}#U#V(t#V#W&r#W#X)d#X#Y&r#Y#Z*v#Z#`&r#`#a,h#a#c&r#c#d.]#d#h&r#h#i0Q#i#k&r#k#l1d#l#o&r#y#z#`$f$g#`#BY#BZ#`$IS$I_#`$Ip$Iq$`$Iq$Ir$`$I|$JO#`$JT$JU#`$KV$KW#`&FU&FV#`~#eYc~X^#`pq#`#y#z#`$f$g#`#BY#BZ#`$IS$I_#`$I|$JO#`$JT$JU#`$KV$KW#`&FU&FV#`~$WP!_!`$Z~$`Oj~~$cUOr$`rs$us$Ip$`$Ip$Iq$u$Iq$Ir$u$Ir~$`~$zOY~~%PV[~OY$zZ]$z^!P$z!P!Q%f!Q#O$z#O#P%k#P~$z~%kO[~~%nPO~$z~%vPX~!Q![%q~&OPe~!_!`&R~&WOh~~&]Pi~#r#s&`~&eOm~~&jPl~!_!`&m~&rOk~P&wQRP!c!}&r#T#o&rR'SURP!c!}&r#T#b&r#b#c'f#c#g&r#g#h(U#h#o&rR'kSRP!c!}&r#T#W&r#W#X'w#X#o&rR(OQnQRP!c!}&r#T#o&rR(ZSRP!c!}&r#T#V&r#V#W(g#W#o&rR(nQrQRP!c!}&r#T#o&rR(ySRP!c!}&r#T#m&r#m#n)V#n#o&rR)^QpQRP!c!}&r#T#o&rR)iSRP!c!}&r#T#X&r#X#Y)u#Y#o&rR)zSRP!c!}&r#T#g&r#g#h*W#h#o&rR*]SRP!c!}&r#T#V&r#V#W*i#W#o&rR*pQqQRP!c!}&r#T#o&rR*{RRP!c!}&r#T#U+U#U#o&rR+ZSRP!c!}&r#T#`&r#`#a+g#a#o&rR+lSRP!c!}&r#T#g&r#g#h+x#h#o&rR+}SRP!c!}&r#T#X&r#X#Y,Z#Y#o&rR,bQgQRP!c!}&r#T#o&rR,mSRP!c!}&r#T#]&r#]#^,y#^#o&rR-OSRP!c!}&r#T#a&r#a#b-[#b#o&rR-aSRP!c!}&r#T#]&r#]#^-m#^#o&rR-rSRP!c!}&r#T#h&r#h#i.O#i#o&rR.VQsQRP!c!}&r#T#o&rR.bSRP!c!}&r#T#f&r#f#g.n#g#o&rR.sSRP!c!}&r#T#W&r#W#X/P#X#o&rR/USRP!c!}&r#T#X&r#X#Y/b#Y#o&rR/gSRP!c!}&r#T#f&r#f#g/s#g#o&rR/zQoQRP!c!}&r#T#o&rR0VSRP!c!}&r#T#f&r#f#g0c#g#o&rR0hSRP!c!}&r#T#i&r#i#j0t#j#o&rR0ySRP!c!}&r#T#X&r#X#Y1V#Y#o&rR1^QfQRP!c!}&r#T#o&rR1iSRP!c!}&r#T#[&r#[#]1u#]#o&rR1zSRP!c!}&r#T#X&r#X#Y2W#Y#o&rR2]SRP!c!}&r#T#f&r#f#g2i#g#o&rR2nSRP!c!}&r#T#X&r#X#Y2z#Y#o&rR3RQdQRP!c!}&r#T#o&r",
tokenizers: [0, 1],
topRules: {"Program":[0,1]},
tokenPrec: 0
})

View File

@ -0,0 +1,17 @@
// This file was generated by lezer-generator. You probably shouldn't edit it.
export const
Program = 1,
Query = 2,
Name = 3,
WhereClause = 4,
LogicalExpr = 5,
AndExpr = 6,
FilterExpr = 7,
Value = 8,
Number = 9,
String = 10,
Bool = 11,
Regex = 12,
OrderClause = 13,
Order = 14,
LimitClause = 15

View File

@ -14,7 +14,7 @@ Order {
"desc" | "asc"
}
Value { Number | String | Bool }
Value { Number | String | Bool | Regex }
LogicalExpr { AndExpr | FilterExpr }
@ -27,7 +27,7 @@ FilterExpr {
| Name "!=" Value
| Name ">=" Value
| Name ">" Value
| Name "like" Value
| Name "=~" Value
}
@skip { space }
@ -39,6 +39,10 @@ Bool {
@tokens {
space { std.whitespace+ }
Name { std.asciiLetter+ }
String { "\"" ![\"]* "\"" }
String {
("\"" | "“" | "”") ![\"”“]* ("\"" | "“" | "”")
}
Regex { "/" ( ![/\\\n\r] | "\\" _ )* "/"? }
Number { std.digit+ }
}

View File

@ -5,3 +5,8 @@ functions:
path: ./materialized_queries.ts:updateMaterializedQueriesCommand
command:
name: "Materialized Queries: Update"
key: "Alt-q"
indexData:
path: ./data.ts:indexData
events:
- page:index

View File

@ -5,7 +5,7 @@ import { whiteOutQueries } from "../query/materialized_queries";
import { batchSet } from "plugos-silverbullet-syscall/index";
import { readPage, writePage } from "plugos-silverbullet-syscall/space";
import { parseMarkdown } from "plugos-silverbullet-syscall/markdown";
import { dispatch, getText } from "plugos-silverbullet-syscall/editor";
import { dispatch, getCurrentPage, getText } from "plugos-silverbullet-syscall/editor";
import {
addParentPointers,
collectNodesMatching,
@ -14,12 +14,14 @@ import {
renderToText
} from "../../common/tree";
type Task = {
task: string;
complete: boolean;
export type Task = {
name: string;
done: boolean;
deadline?: string;
pos?: number;
nested?: string;
// Not saved in DB, just added when pulled out (from key)
pos?: number;
page?: string;
};
export async function indexTasks({ name, text }: IndexEvent) {
@ -32,8 +34,8 @@ export async function indexTasks({ name, text }: IndexEvent) {
let task = n.children!.slice(1).map(renderToText).join("").trim();
let complete = n.children![0].children![0].text! !== "[ ]";
let value: Task = {
task,
complete,
name: task,
done: complete,
};
let deadlineNodes = collectNodesOfType(n, "DeadlineDate");
@ -62,6 +64,7 @@ export async function taskToggle(event: ClickEvent) {
}
export async function taskToggleAtPos(pos: number) {
let currentpage = await getCurrentPage();
let text = await getText();
let mdTree = await parseMarkdown(text);
addParentPointers(mdTree);
@ -91,8 +94,9 @@ export async function taskToggleAtPos(pos: number) {
let ref = wikiLink.children![0].text!;
if (ref.includes("@")) {
let [page, pos] = ref.split("@");
let pageData = await readPage(page);
let text = pageData.text;
if (page !== currentpage) {
text = (await readPage(page)).text;
}
let referenceMdTree = await parseMarkdown(text);
// Adding +1 to immediately hit the task marker

View File

@ -32,7 +32,7 @@ export function mdExtensionSyntaxConfig({
}
return cx.addElement(cx.elt(nodeType, pos, pos + match[0].length));
},
after: "Emphasis",
// after: "Emphasis",
},
],
};

View File

@ -78,6 +78,7 @@ export default function buildMarkdown(mdExtensions: MDExt[]): Language {
codeParser: getCodeParser([
LanguageDescription.of({
name: "yaml",
alias: ["meta", "data"],
support: new LanguageSupport(StreamLanguage.define(yaml)),
}),
LanguageDescription.of({

View File

@ -63,7 +63,7 @@
}
.line-fenced-code {
background-color: #efefef;
background-color: rgba(72, 72, 72, 0.1);
}
.meta {
@ -99,6 +99,10 @@
text-decoration: underline;
}
.atom {
color: darkred;
}
.wiki-link-page {
color: #0330cb;
background-color: rgba(77,141,255,0.07);
@ -111,13 +115,7 @@
//background-color: rgba(77,141,255,0.07);
}
.mention {
color: #0330cb;
}
.tag {
color: #8d8d8d;
}
.code {
background-color: #efefef;