From 821dddff5ec472d144808dee529673851454bf1b Mon Sep 17 00:00:00 2001 From: Zef Hemel Date: Thu, 9 Jan 2025 09:00:29 +0100 Subject: [PATCH] Lua string.gsub fixes --- common/space_lua/language_test.lua | 59 +++++++++++++++++++++ common/space_lua/stdlib/string.ts | 83 +++++++++++++++++++++++++++--- 2 files changed, 134 insertions(+), 8 deletions(-) diff --git a/common/space_lua/language_test.lua b/common/space_lua/language_test.lua index 258405a7..7a02292b 100644 --- a/common/space_lua/language_test.lua +++ b/common/space_lua/language_test.lua @@ -276,6 +276,65 @@ assert(string.sub("Hello", 2, 4) == "ell") assert(string.upper("Hello") == "HELLO") assert(string.lower("Hello") == "hello") +-- Test string.gsub with various replacement types +-- Simple string replacement +local result, count = string.gsub("hello world", "hello", "hi") +assert(result == "hi world", "Basic string replacement failed") +assert(count == 1, "Basic replacement count failed") + +-- Multiple replacements +result, count = string.gsub("hello hello hello", "hello", "hi") +assert(result == "hi hi hi", "Multiple replacements failed") +assert(count == 3, "Multiple replacement count failed") + +-- Limited replacements with n parameter +result, count = string.gsub("hello hello hello", "hello", "hi", 2) +assert(result == "hi hi hello", "Limited replacements failed") +assert(count == 2, "Limited replacement count failed") + +-- Function replacement without captures +result = string.gsub("hello world", "hello", function(match) + assert(match == "hello", "Function received incorrect match") + return string.upper(match) +end) +assert(result == "HELLO world", "Function replacement without captures failed") + +-- Function replacement with single capture +result = string.gsub("hello world", "(h)ello", function(h) + assert(h == "h", "Function received incorrect capture") + return string.upper(h) .. "i" +end) +assert(result == "Hi world", "Function replacement with single capture failed") + +-- Function replacement with multiple captures +result = string.gsub("hello world", "(h)(e)(l)(l)o", function(h, e, l1, l2) + print("Captures:", h, e, l1, l2) -- Debug what captures we're getting + assert(h == "h" and e == "e" and l1 == "l" and l2 == "l", + "Function received incorrect captures: " .. h .. ", " .. e .. ", " .. l1 .. ", " .. l2) + return string.upper(h) .. string.upper(e) .. l1 .. l2 .. "o" +end) +print("Result:", result) -- Debug the actual result +assert(result == "HEllo world", "Function replacement with multiple captures failed") + +-- Function returning nil (should keep original match) +result = string.gsub("hello world", "hello", function() return nil end) +assert(result == "hello world", "Function returning nil failed") + +-- Pattern with multiple matches on same position +result = string.gsub("hello world", "h?e", "X") +assert(result == "Xllo world", "Overlapping matches failed") + +-- Empty captures +result = string.gsub("hello", "(h()e)", function(full, empty) + assert(full == "he" and empty == "", "Empty capture handling failed") + return "XX" +end) +assert(result == "XXllo", "Empty capture replacement failed") + +-- Patterns with magic characters +result = string.gsub("hello.world", "%.", "-") +assert(result == "hello-world", "Magic character replacement failed") + -- table functions local t = { 1, 2, 3 } table.insert(t, 4) diff --git a/common/space_lua/stdlib/string.ts b/common/space_lua/stdlib/string.ts index 85c96fbc..bfe9b953 100644 --- a/common/space_lua/stdlib/string.ts +++ b/common/space_lua/stdlib/string.ts @@ -1,5 +1,7 @@ import { LuaBuiltinFunction, + luaCall, + LuaFunction, LuaMultiRes, LuaTable, luaToString, @@ -43,19 +45,84 @@ export const stringApi = new LuaTable({ }; }), gsub: new LuaBuiltinFunction( - (_sf, s: string, pattern: string, repl: string, n?: number) => { + async ( + sf, + s: string, + pattern: string, + repl: any, // string or LuaFunction + n?: number, + ) => { n = n ?? Infinity; - const regex = new RegExp(pattern, "g"); + + // Convert Lua patterns to JavaScript regex + // This handles: + // - %.: Match literal dot + // - %%: Match literal % + // - %d: Match digit + // - %s: Match whitespace + // - %w: Match word character + const jsPattern = pattern + .replace(/%(.)/g, (_, char) => { + switch (char) { + case ".": + return "[.]"; // Match literal dot using character class + case "%": + return "%"; // Match literal % + case "d": + return "\\d"; // Match digit + case "s": + return "\\s"; // Match whitespace + case "w": + return "\\w"; // Match word character + default: + return char; // Match literal character + } + }); + + const regex = new RegExp(jsPattern, "g"); let result = s; + let count = 0; + + // Collect all matches first to handle replacements properly + const positions: Array<[number, number, string, string[]]> = []; let match: RegExpExecArray | null; - for (let i = 0; i < n; i++) { - match = regex.exec(result); - if (!match) { - break; + let lastIndex = 0; + + while ((match = regex.exec(result)) !== null && count < n) { + if (match.index >= lastIndex) { + positions.push([ + match.index, + match[0].length, + match[0], + match.slice(1), + ]); + count++; + lastIndex = match.index + 1; } - result = result.replace(match[0], repl); + regex.lastIndex = match.index + 1; } - return result; + + // Process replacements in reverse order to maintain string indices + for (let i = positions.length - 1; i >= 0; i--) { + const [index, length, fullMatch, captures] = positions[i]; + + let replacement: any; + if (repl.call) { + const args = captures.length > 0 ? captures : [fullMatch]; + replacement = await luaCall(repl, args, sf.astCtx!, sf); + replacement = (replacement === null || replacement === undefined) + ? fullMatch + : replacement; + } else { + replacement = repl; + } + + result = result.slice(0, index) + + replacement + + result.slice(index + length); + } + + return new LuaMultiRes([result, count]); }, ), len: new LuaBuiltinFunction((_sf, s: string) => {