From 2ad56f14c37272dee41be52232fa1db255d10517 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 16 Apr 2026 18:44:26 +0200 Subject: [PATCH 1/6] adds exclude filters to all file system crawl and find functions --- src/org/rascalmpl/library/util/FileSystem.rsc | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/org/rascalmpl/library/util/FileSystem.rsc b/src/org/rascalmpl/library/util/FileSystem.rsc index 4c124b764ce..bd05c81e1e7 100644 --- a/src/org/rascalmpl/library/util/FileSystem.rsc +++ b/src/org/rascalmpl/library/util/FileSystem.rsc @@ -9,27 +9,43 @@ module util::FileSystem import IO; +@synopsis{Model of a file system with its (nested) files and directories} data FileSystem = directory(loc l, set[FileSystem] children) | file(loc l) ; -FileSystem crawl(loc l) = isDirectory(l) ? directory(l, {crawl(e) | e <- l.ls}) : file(l); +@synopsis{Extract a compositional ((FileSystem)) model starting from a given directory location.} +@description{ +* Using `exclude` you can avoid going into certain directories or filter specific files from the result. +} +FileSystem crawl(loc l, set[loc] exclude= {}) + = isDirectory(l) ? directory(l, {crawl(e, exclude=exclude) | e <- l.ls, l notin exclude}) : file(l); -@synopsis{Recursively lists locations of all files from the supplied directory. - If input is a file, its location is returned instead.} -set[loc] files(loc l) = isDirectory(l) ? { *files(e) | e <- l.ls } : {l}; +@synopsis{Recursively lists locations of all files from the supplied directory.} +@description{ +* If input `l` is a file, its location is returned instead. +* Using `exclude` you can avoid going into certain directories or filter specific files from the result. +} +set[loc] files(loc l, set[loc] exclude={}) = isDirectory(l) ? { *files(e, exclude=exclude) | e <- l.ls, e notin exclude} : {l}; -@synopsis{Recursively lists locations of all files that satisfy the filter criterion `filt`. - For a file to be included, `filt` must return `true` for it.} -set[loc] find(loc f, bool (loc) filt) +@synopsis{Recursively lists locations of all files that satisfy the filter criterion `filt`.} +@description{ +* For a file to be included, `filt` must return `true` for it. All directories are traversed though, regardless of `filt`. +* Using `exclude` you can avoid going into certain directories or filter specific files from the result. +} +set[loc] find(loc f, bool (loc) filt, set[loc] exclude = {}) = isDirectory(f) - ? {*find(c, filt) | c <- f.ls} + (filt(f) ? {f} : { }) + ? {*find(c, filt, exclude=exclude) | c <- f.ls, c notin exclude} + ((filt(f) && f notin exclude) ? {f} : { }) : (filt(f) ? {f} : { }) ; @synopsis{Recursively lists locations of all files that end in `ext`.} -set[loc] find(loc f, str ext) = find(f, bool (loc l) { return l.extension == ext; }); +@description{ +* For a file to be included, it's extension must equal `ext`. All directories are traversed though, regardless of their extension. +* Using `exclude` you can avoid going into certain directories or filter specific files from the result. +} +set[loc] find(loc f, str ext, set[loc] exclude={}) = find(f, bool (loc l) { return l.extension == ext; }, exclude=exclude); @synopsis{Lists all files recursively ignored files and directories starting with a dot.} set[loc] visibleFiles(loc l) { From 499eaa7cf4e438d33a3a9fa6a3b2da65280963c9 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Fri, 17 Apr 2026 09:16:23 +0200 Subject: [PATCH 2/6] improved reporting on missing modules during parser generation by adding warnings. --- .../rascal/grammar/definition/Modules.rsc | 22 +++++++++++++------ .../grammar/storage/ModuleParserStorage.rsc | 17 ++++++++++++-- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/definition/Modules.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/definition/Modules.rsc index 240490bb805..fc56cdab0f0 100644 --- a/src/org/rascalmpl/library/lang/rascal/grammar/definition/Modules.rsc +++ b/src/org/rascalmpl/library/lang/rascal/grammar/definition/Modules.rsc @@ -18,6 +18,8 @@ import Set; import IO; import util::Monitor; +syntax HereLocation = (); + @memo @synopsis{Converts internal module representation of Rascal interpreter to single grammar definition} public Grammar modules2grammar(str main, map[str name, tuple[set[str] imports, set[str] extends, set[SyntaxDefinition] defs] \mod] mods) { @@ -51,17 +53,22 @@ public Grammar fuse(GrammarDefinition def) { done = {}; deps = dependencies(def); + if (!def.modules[def.main]?) { + jobWarning("the main module is unavailable (ignored)", (HereLocation)``@\loc); + } + while (todo != {}) { = takeOneFrom(todo); done += nm; - if(def.modules[nm]?){ - \mod = def.modules[nm]; - result = (compose(result, \mod.grammar) | compose(it, def.modules[i].grammar) | i <- deps[nm], def.modules[i]?); - todo += (\mod.extends - done); - } - else { - warning("Fuse algorithm misses module definition for dependency ", |unknown:///|); + + \mod = def.modules[nm]; + + for (str i <- deps[nm], !def.modules[i]?) { + jobWarning(" imports or extends the unavailable module (ignored)", (HereLocation)``@\loc); } + + result = (compose(result, \mod.grammar) | compose(it, def.modules[i].grammar) | i <- deps[nm], def.modules[i]?); + todo += (\mod.extends - done); } return result; @@ -69,6 +76,7 @@ public Grammar fuse(GrammarDefinition def) { + public GrammarModule module2grammar(Module \mod) { = getModuleMetaInf(\mod); return \module(nm, imps, exts, syntax2grammar(collect(\mod))); diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc index 88b36c12a3a..5f6b1cac8fe 100644 --- a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc +++ b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc @@ -102,13 +102,26 @@ int main(list[str] args) { storeParsersForModules(pcfg); } ``` + +Or, you could use the ((PathConfig)) parameter feature of `main` functions. The Rascal maven plugin will +make sure to pass the proper ((PathConfig)) parameter to your main function: +```rascal +module YourMainModule + +import util::Reflective; +import lang::rascal::grammar::storage::ModuleParserStorage; + +int main(PathConfig pcfg = pathConfig()) { + storeParsersForModules(pcfg); +} +``` } void storeParsersForModules(PathConfig pcfg) { - storeParsersForModules({*find(src, "rsc") | src <- pcfg.srcs, bprintln("Crawling ")}, pcfg); + storeParsersForModules({*find(src, "rsc", exclude={*pcfg.ignores}) | src <- pcfg.srcs, bprintln("Crawling ")}, pcfg); } void storeParsersForModules(set[loc] moduleFiles, PathConfig pcfg) { - storeParsersForModules({parseModule(m) | m <- moduleFiles, bprintln("Loading ")}, pcfg); + storeParsersForModules({parseModule(m) | m <- moduleFiles, m notin pcfg.ignores, bprintln("Loading ")}, pcfg); } void storeParsersForModules(set[Module] modules, PathConfig pcfg) { From 9f0e20dda91708139bd3da355f919f98eb13c587 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Fri, 17 Apr 2026 09:25:53 +0200 Subject: [PATCH 3/6] removed bootstrap hazard in parser generation module --- .../library/lang/rascal/grammar/definition/Modules.rsc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/definition/Modules.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/definition/Modules.rsc index fc56cdab0f0..ae0ace03a04 100644 --- a/src/org/rascalmpl/library/lang/rascal/grammar/definition/Modules.rsc +++ b/src/org/rascalmpl/library/lang/rascal/grammar/definition/Modules.rsc @@ -18,7 +18,7 @@ import Set; import IO; import util::Monitor; -syntax HereLocation = (); +private loc here = |std:///lang/rascal/syntax/grammar/definition/Modules|; @memo @synopsis{Converts internal module representation of Rascal interpreter to single grammar definition} @@ -54,7 +54,7 @@ public Grammar fuse(GrammarDefinition def) { deps = dependencies(def); if (!def.modules[def.main]?) { - jobWarning("the main module is unavailable (ignored)", (HereLocation)``@\loc); + jobWarning("the main module is unavailable (ignored)", here); } while (todo != {}) { @@ -64,7 +64,7 @@ public Grammar fuse(GrammarDefinition def) { \mod = def.modules[nm]; for (str i <- deps[nm], !def.modules[i]?) { - jobWarning(" imports or extends the unavailable module (ignored)", (HereLocation)``@\loc); + jobWarning(" imports or extends the unavailable module (ignored)", here); } result = (compose(result, \mod.grammar) | compose(it, def.modules[i].grammar) | i <- deps[nm], def.modules[i]?); From 1d2d09e57620272e2494e9a4912977e2b2b986d4 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 20 Apr 2026 11:36:46 +0200 Subject: [PATCH 4/6] added optionally (by default) fail fast versions of find, crawl, etc. such that the directory provided at the top-level of the recursion must at least exist, and lower levels do not check existence because .ls already guarantees existence modulo races on disk --- src/org/rascalmpl/library/IO.rsc | 2 +- src/org/rascalmpl/library/util/FileSystem.rsc | 61 ++++++++++++++++--- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/org/rascalmpl/library/IO.rsc b/src/org/rascalmpl/library/IO.rsc index 5e3255b4f67..e696f4e5a6c 100644 --- a/src/org/rascalmpl/library/IO.rsc +++ b/src/org/rascalmpl/library/IO.rsc @@ -18,6 +18,7 @@ The following input/output functions are defined: module IO import Exception; + extend analysis::diff::edits::FileSystemChanges; @synopsis{All functions in this module that have a charset parameter use this as default.} @@ -209,7 +210,6 @@ exists(|std:///IO.rsc|); @javaClass{org.rascalmpl.library.Prelude} public java bool exists(loc file); - @synopsis{Find a named file in a list of locations.} @examples{ ```rascal-shell diff --git a/src/org/rascalmpl/library/util/FileSystem.rsc b/src/org/rascalmpl/library/util/FileSystem.rsc index bd05c81e1e7..172688a23eb 100644 --- a/src/org/rascalmpl/library/util/FileSystem.rsc +++ b/src/org/rascalmpl/library/util/FileSystem.rsc @@ -7,7 +7,9 @@ } module util::FileSystem +import Exception; import IO; +import util::Monitor; @synopsis{Model of a file system with its (nested) files and directories} data FileSystem @@ -18,40 +20,81 @@ data FileSystem @synopsis{Extract a compositional ((FileSystem)) model starting from a given directory location.} @description{ * Using `exclude` you can avoid going into certain directories or filter specific files from the result. +* With `checkExist=true` the `l` parameter is checked to exist before the file system is crawled and a PathNotFound exception is thrown if not. } -FileSystem crawl(loc l, set[loc] exclude= {}) - = isDirectory(l) ? directory(l, {crawl(e, exclude=exclude) | e <- l.ls, l notin exclude}) : file(l); +FileSystem crawl(loc l, set[loc] exclude= {}, bool checkExist=true) throws PathNotFound + = isDirectory(l) ? directory(l, {crawl(e, exclude=exclude, checkExist=false) | e <- l.ls, l notin exclude}) : file(l) + when checkExist ==> throwNotExist(l) + ; @synopsis{Recursively lists locations of all files from the supplied directory.} @description{ * If input `l` is a file, its location is returned instead. * Using `exclude` you can avoid going into certain directories or filter specific files from the result. +* With `checkExist=true` the `l` parameter is checked to exist before the file system is crawled and a PathNotFound exception is thrown if not. } -set[loc] files(loc l, set[loc] exclude={}) = isDirectory(l) ? { *files(e, exclude=exclude) | e <- l.ls, e notin exclude} : {l}; +set[loc] files(loc l, set[loc] exclude={}, bool checkExist=true) throws PathNotFound + = isDirectory(l) ? { *files(e, exclude=exclude, checkExist=false) | e <- l.ls, e notin exclude} : {l} + when checkExist ==> throwNotExist(l); @synopsis{Recursively lists locations of all files that satisfy the filter criterion `filt`.} @description{ * For a file to be included, `filt` must return `true` for it. All directories are traversed though, regardless of `filt`. * Using `exclude` you can avoid going into certain directories or filter specific files from the result. +* With `checkExist=true` the `f` parameter is checked to exist before the file system is crawled and a PathNotFound exception is thrown if not. } -set[loc] find(loc f, bool (loc) filt, set[loc] exclude = {}) +set[loc] find(loc f, bool (loc) filt, set[loc] exclude = {}, bool checkExist=true) throws PathNotFound = isDirectory(f) - ? {*find(c, filt, exclude=exclude) | c <- f.ls, c notin exclude} + ((filt(f) && f notin exclude) ? {f} : { }) + ? {*find(c, filt, exclude=exclude, checkExist=false) | c <- f.ls, c notin exclude} + ((filt(f) && f notin exclude) ? {f} : { }) : (filt(f) ? {f} : { }) + when checkExist ==> throwNotExist(f) ; @synopsis{Recursively lists locations of all files that end in `ext`.} @description{ * For a file to be included, it's extension must equal `ext`. All directories are traversed though, regardless of their extension. * Using `exclude` you can avoid going into certain directories or filter specific files from the result. +* With `checkExist=true` the `f` parameter is checked to exist before the file system is crawled and a PathNotFound exception is thrown if not. } -set[loc] find(loc f, str ext, set[loc] exclude={}) = find(f, bool (loc l) { return l.extension == ext; }, exclude=exclude); +set[loc] find(loc f, str ext, set[loc] exclude={}, bool checkExist=true) throws PathNotFound + = find(f, bool (loc l) { return l.extension == ext; }, exclude=exclude, checkExist=checkExist); @synopsis{Lists all files recursively ignored files and directories starting with a dot.} -set[loc] visibleFiles(loc l) { - if (/^\./ := l.file) +set[loc] visibleFiles(loc l, bool checkExist=true) throws PathNotFound { + if (checkExist) { + throwNotExist(l); + } + if (/^\./ := l.file) { return {}; - if (isDirectory(l)) + } + if (isDirectory(l)) { return {*visibleFiles(f) | f <- l.ls}; + } return {l}; } + +@synopsis{Always returns true, but shows a ((util::Monitor::jobWarning)) if `file` does not exist.} +@benefits{ +* This can be used practically in comprehensions that process file locations. +* Use it to fail more transparantly, but still in case of erroneous file configuration (paths) +} +bool warnNotExist(loc file) { + if (!exists(file)) { + jobWarning(" does not exist.", |std:///util/FileSystem|); + } + return true; +} + +@synopsis{Always returns true, except when throwing FileNotFound if `file` does not exist} +@benefits{ +* This can be used practically in comprehensions that process file locations; +* Use it to fail faster and harder in case of erroneous file configuration (paths) +} +bool throwNotExist(loc file) throws PathNotFound { + if (!exists(file)) { + throw PathNotFound(file); + } + return true; +} + + From 39a532ca3710a113d849fc3820b8dbb123f65508 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 20 Apr 2026 12:36:49 +0200 Subject: [PATCH 5/6] checkExist is now false by default for backward compatibility --- src/org/rascalmpl/library/util/FileSystem.rsc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/org/rascalmpl/library/util/FileSystem.rsc b/src/org/rascalmpl/library/util/FileSystem.rsc index 172688a23eb..18ad29c5a3f 100644 --- a/src/org/rascalmpl/library/util/FileSystem.rsc +++ b/src/org/rascalmpl/library/util/FileSystem.rsc @@ -22,7 +22,7 @@ data FileSystem * Using `exclude` you can avoid going into certain directories or filter specific files from the result. * With `checkExist=true` the `l` parameter is checked to exist before the file system is crawled and a PathNotFound exception is thrown if not. } -FileSystem crawl(loc l, set[loc] exclude= {}, bool checkExist=true) throws PathNotFound +FileSystem crawl(loc l, set[loc] exclude= {}, bool checkExist=false) throws PathNotFound = isDirectory(l) ? directory(l, {crawl(e, exclude=exclude, checkExist=false) | e <- l.ls, l notin exclude}) : file(l) when checkExist ==> throwNotExist(l) ; @@ -33,7 +33,7 @@ FileSystem crawl(loc l, set[loc] exclude= {}, bool checkExist=true) throws PathN * Using `exclude` you can avoid going into certain directories or filter specific files from the result. * With `checkExist=true` the `l` parameter is checked to exist before the file system is crawled and a PathNotFound exception is thrown if not. } -set[loc] files(loc l, set[loc] exclude={}, bool checkExist=true) throws PathNotFound +set[loc] files(loc l, set[loc] exclude={}, bool checkExist=false) throws PathNotFound = isDirectory(l) ? { *files(e, exclude=exclude, checkExist=false) | e <- l.ls, e notin exclude} : {l} when checkExist ==> throwNotExist(l); @@ -43,7 +43,7 @@ set[loc] files(loc l, set[loc] exclude={}, bool checkExist=true) throws PathNotF * Using `exclude` you can avoid going into certain directories or filter specific files from the result. * With `checkExist=true` the `f` parameter is checked to exist before the file system is crawled and a PathNotFound exception is thrown if not. } -set[loc] find(loc f, bool (loc) filt, set[loc] exclude = {}, bool checkExist=true) throws PathNotFound +set[loc] find(loc f, bool (loc) filt, set[loc] exclude = {}, bool checkExist=false) throws PathNotFound = isDirectory(f) ? {*find(c, filt, exclude=exclude, checkExist=false) | c <- f.ls, c notin exclude} + ((filt(f) && f notin exclude) ? {f} : { }) : (filt(f) ? {f} : { }) @@ -56,11 +56,11 @@ set[loc] find(loc f, bool (loc) filt, set[loc] exclude = {}, bool checkExist=tru * Using `exclude` you can avoid going into certain directories or filter specific files from the result. * With `checkExist=true` the `f` parameter is checked to exist before the file system is crawled and a PathNotFound exception is thrown if not. } -set[loc] find(loc f, str ext, set[loc] exclude={}, bool checkExist=true) throws PathNotFound +set[loc] find(loc f, str ext, set[loc] exclude={}, bool checkExist=false) throws PathNotFound = find(f, bool (loc l) { return l.extension == ext; }, exclude=exclude, checkExist=checkExist); @synopsis{Lists all files recursively ignored files and directories starting with a dot.} -set[loc] visibleFiles(loc l, bool checkExist=true) throws PathNotFound { +set[loc] visibleFiles(loc l, bool checkExist=false) throws PathNotFound { if (checkExist) { throwNotExist(l); } @@ -68,7 +68,7 @@ set[loc] visibleFiles(loc l, bool checkExist=true) throws PathNotFound { return {}; } if (isDirectory(l)) { - return {*visibleFiles(f) | f <- l.ls}; + return {*visibleFiles(f, checkExist=false) | f <- l.ls}; } return {l}; } From 8ae5d13c5d98de634b5c36e8bc2418f490360f02 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 20 Apr 2026 12:38:31 +0200 Subject: [PATCH 6/6] ModuleParserStorage fails fast on non-existent source folders --- .../library/lang/rascal/grammar/storage/ModuleParserStorage.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc index 5f6b1cac8fe..10bcaeae2f4 100644 --- a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc +++ b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc @@ -117,7 +117,7 @@ int main(PathConfig pcfg = pathConfig()) { ``` } void storeParsersForModules(PathConfig pcfg) { - storeParsersForModules({*find(src, "rsc", exclude={*pcfg.ignores}) | src <- pcfg.srcs, bprintln("Crawling ")}, pcfg); + storeParsersForModules({*find(src, "rsc", exclude={*pcfg.ignores}, checkExist=true) | src <- pcfg.srcs, bprintln("Crawling ")}, pcfg); } void storeParsersForModules(set[loc] moduleFiles, PathConfig pcfg) {