Skip to content

Commit 76c4097

Browse files
davidsmfreireclaude
andcommitted
feat: Go and JavaScript/TypeScript SQL extractors
Adds tree-sitter-backed extractors for `.go`, `.js`, `.ts`, and `.tsx` files, mirroring the existing `.py` / `.rs` finders. - `finder/go.rs`: interpreted strings (`"…"`) and raw strings (`` `…` ``). `fmt`-style verbs (`%s`, `%d`, …) get swapped for `1` so format arguments don't break SQL parsing; `%%` stays as a literal `%`. - `finder/javascript.rs`: single/double-quoted `string` literals and backtick `template_string` literals. Template substitutions (`${...}`) become `1`. Used for `.js`, `.ts`, and `.tsx`; the TypeScript grammar exposes the same string-literal node kinds, so only the language pointer differs per extension. - `finder/mod.rs`: `SUPPORTED_CODE_FILE_EXTENSIONS` extended to `["py","rs","go","ts","tsx","js"]`; dispatch wired. - LSP and VS Code extension cover the new extensions for diagnostics and activation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ce6afd3 commit 76c4097

11 files changed

Lines changed: 506 additions & 9 deletions

File tree

Cargo.lock

Lines changed: 35 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ROADMAP.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,13 @@
1818
- Output formats: text + JSON; split exit codes; `--stdin` mode.
1919
- `.sqlshield.toml` configuration with CLI override layering.
2020
- Parallel file walker (rayon) with default ignore list.
21+
- Language extractors: `.py`, `.rs`, `.go`, `.js`, `.ts`, `.tsx`
22+
string literals, raw / template strings, and language-specific
23+
placeholder forms (Python f-strings / `.format()`, Go `fmt` verbs,
24+
JS template substitutions).
2125
- Language Server (`sqlshield-lsp`) for inline editor diagnostics
22-
in `.py` / `.rs` / `.sql`; auto-reload on schema-file changes.
26+
across every supported source extension; auto-reload on schema-file
27+
changes.
2328
- First-party VS Code extension (`editors/vscode`) wrapping
2429
`sqlshield-lsp` over stdio.
2530
- Live database introspection (`sqlshield-introspect`, exposed via
@@ -32,7 +37,7 @@
3237
- **MySQL live introspection** — pending: `mysql_common` uses unstable
3338
Rust features that haven't reached the project's pinned toolchain.
3439
A toolchain bump or a different sync driver would unblock this.
35-
- **More language extractors**Go, TypeScript, Java string literals.
40+
- **More language extractors**Java, C#, PHP, Ruby string literals.
3641
Each is a small `finder/<lang>.rs` module + tree-sitter grammar.
3742

3843
## Not planned

editors/vscode/package.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@
2828
"onLanguage:sql",
2929
"onLanguage:python",
3030
"onLanguage:rust",
31+
"onLanguage:go",
32+
"onLanguage:javascript",
33+
"onLanguage:javascriptreact",
34+
"onLanguage:typescript",
35+
"onLanguage:typescriptreact",
3136
"workspaceContains:**/.sqlshield.toml",
3237
"workspaceContains:**/schema.sql"
3338
],

editors/vscode/src/extension.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ async function start(_context: ExtensionContext): Promise<void> {
4545
{ scheme: "file", language: "sql" },
4646
{ scheme: "file", language: "python" },
4747
{ scheme: "file", language: "rust" },
48+
{ scheme: "file", language: "go" },
49+
{ scheme: "file", language: "javascript" },
50+
{ scheme: "file", language: "javascriptreact" },
51+
{ scheme: "file", language: "typescript" },
52+
{ scheme: "file", language: "typescriptreact" },
4853
],
4954
synchronize: {
5055
fileEvents: workspace.createFileSystemWatcher("**/.sqlshield.toml"),

sqlshield-lsp/src/server.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ fn compute_diagnostics(text: &str, file_ext: &str, state: &LoadedState) -> Vec<D
256256
}
257257
}
258258
}
259-
"py" | "rs" => {
259+
"py" | "rs" | "go" | "ts" | "tsx" | "js" => {
260260
let dialect = state.dialect.as_sqlparser();
261261
match sqlshield::finder::find_queries_in_code_with_dialect(
262262
text.as_bytes(),

sqlshield/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@ regex = "1.10.3"
1717
sqlparser = { version = "0.43.1", features = ["visitor"] }
1818
thiserror = "1.0"
1919
tree-sitter = "0.20.10"
20+
tree-sitter-go = "0.20.0"
21+
tree-sitter-javascript = "0.20.4"
2022
tree-sitter-python = "0.20.4"
2123
tree-sitter-rust = "0.20.4"
24+
tree-sitter-typescript = "0.20.5"
2225
walkdir = "2.4.0"
2326

2427
[dev-dependencies]

sqlshield/src/finder/go.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
//! Extract SQL string literals from Go source.
2+
//!
3+
//! Go has two string literal forms:
4+
//!
5+
//! * Interpreted strings (`"…"`) — backslash escapes apply (`\"`, `\n`, …).
6+
//! Tree-sitter labels these `interpreted_string_literal`.
7+
//! * Raw strings (`` `…` ``) — no escape processing; backtick is the only
8+
//! forbidden character. Tree-sitter labels these `raw_string_literal`.
9+
//!
10+
//! Go has no built-in interpolation, but `fmt.Sprintf` and friends use
11+
//! `%s` / `%d` / etc. placeholders embedded inside ordinary strings.
12+
//! Replace those with `1` so the parsed SQL still tokenizes — same trick
13+
//! used by the Python finder for `{}` placeholders.
14+
15+
use std::sync::LazyLock;
16+
17+
use regex::Regex;
18+
19+
/// Match the common `fmt`-style verbs (`%s`, `%d`, `%v`, `%q`, …) that
20+
/// appear in `fmt.Sprintf("SELECT %s FROM …", col)`. `%%` is the literal-`%`
21+
/// escape and is preserved.
22+
static FORMAT_VERB_RE: LazyLock<Regex> = LazyLock::new(|| {
23+
Regex::new(r"%[+\-# 0]*\d*(?:\.\d+)?[vTtbcdoOqxXUeEfFgGspw]").expect("static regex is valid")
24+
});
25+
26+
pub fn extract_query_string_from_node(node: &tree_sitter::Node, code: &[u8]) -> Option<String> {
27+
let decoded = match node.kind() {
28+
"interpreted_string_literal" => {
29+
let inner = inner_text(node, code, '"', '"')?;
30+
decode_go_escapes(inner)
31+
}
32+
"raw_string_literal" => inner_text(node, code, '`', '`')?.to_string(),
33+
_ => return None,
34+
};
35+
36+
// Preserve `%%` as a sentinel so the verb pass doesn't see a stray `%`.
37+
const ESC_PCT: char = '\u{0001}';
38+
let escaped = decoded.replace("%%", &ESC_PCT.to_string());
39+
let substituted = FORMAT_VERB_RE.replace_all(&escaped, "1");
40+
Some(substituted.replace(ESC_PCT, "%"))
41+
}
42+
43+
fn inner_text<'a>(
44+
node: &tree_sitter::Node,
45+
code: &'a [u8],
46+
open: char,
47+
close: char,
48+
) -> Option<&'a str> {
49+
let raw = &code[node.start_byte()..node.end_byte()];
50+
let text = std::str::from_utf8(raw).ok()?;
51+
let first = text.find(open)?;
52+
let last = text.rfind(close)?;
53+
if first >= last {
54+
return None;
55+
}
56+
Some(&text[first + 1..last])
57+
}
58+
59+
fn decode_go_escapes(s: &str) -> String {
60+
let mut out = String::with_capacity(s.len());
61+
let mut chars = s.chars();
62+
while let Some(c) = chars.next() {
63+
if c != '\\' {
64+
out.push(c);
65+
continue;
66+
}
67+
match chars.next() {
68+
Some('"') => out.push('"'),
69+
Some('\\') => out.push('\\'),
70+
Some('\'') => out.push('\''),
71+
Some('n') => out.push('\n'),
72+
Some('t') => out.push('\t'),
73+
Some('r') => out.push('\r'),
74+
Some('a') | Some('b') | Some('f') | Some('v') | Some('0') => {
75+
// Drop these control escapes; keeping a literal byte is
76+
// pointless for SQL, and they never carry semantic meaning
77+
// inside a query string.
78+
}
79+
// `\xNN`, `\uNNNN`, `\UNNNNNNNN`, octal — keep the literal
80+
// text. Half-decoding adds risk without value for linting.
81+
Some(other) => {
82+
out.push('\\');
83+
out.push(other);
84+
}
85+
None => out.push('\\'),
86+
}
87+
}
88+
out
89+
}

sqlshield/src/finder/javascript.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
//! Extract SQL string literals from JavaScript and TypeScript source.
2+
//!
3+
//! Both grammars expose the same string-literal shapes:
4+
//!
5+
//! * `string` — single- or double-quoted, with backslash escapes.
6+
//! * `template_string` — backtick-quoted; may contain `${...}`
7+
//! `template_substitution` children.
8+
//!
9+
//! For template strings we follow the Python finder's pattern: each
10+
//! `template_substitution` becomes a literal `1`, which keeps the SQL
11+
//! parsable when substitutions stand in for static values.
12+
13+
pub fn extract_query_string_from_node(node: &tree_sitter::Node, code: &[u8]) -> Option<String> {
14+
match node.kind() {
15+
// string_inner_text already decodes any escape_sequence children;
16+
// its output is the final SQL text.
17+
"string" => string_inner_text(node, code),
18+
"template_string" => Some(extract_template(node, code)),
19+
_ => None,
20+
}
21+
}
22+
23+
/// Get the text between the quotes of a `string` node. Tree-sitter's JS
24+
/// grammar emits `string_fragment` children for the raw content, so prefer
25+
/// concatenating those when present; fall back to slicing between the outer
26+
/// quote characters otherwise.
27+
fn string_inner_text(node: &tree_sitter::Node, code: &[u8]) -> Option<String> {
28+
let mut cursor = node.walk();
29+
let mut content = String::new();
30+
let mut found_fragment = false;
31+
for child in node.children(&mut cursor) {
32+
match child.kind() {
33+
"string_fragment" => {
34+
found_fragment = true;
35+
content.push_str(&String::from_utf8_lossy(
36+
&code[child.start_byte()..child.end_byte()],
37+
));
38+
}
39+
"escape_sequence" => {
40+
found_fragment = true;
41+
let raw = &code[child.start_byte()..child.end_byte()];
42+
let text = std::str::from_utf8(raw).ok()?;
43+
// Decode the single escape inline so the caller's pass is a
44+
// no-op for whatever we produce here.
45+
content.push_str(&decode_js_escapes(text));
46+
}
47+
_ => {}
48+
}
49+
}
50+
if found_fragment {
51+
return Some(content);
52+
}
53+
// Empty string or older grammar shape: slice between the outer quotes.
54+
let raw = &code[node.start_byte()..node.end_byte()];
55+
let text = std::str::from_utf8(raw).ok()?;
56+
let bytes = text.as_bytes();
57+
let quote = bytes.first().copied()?;
58+
if quote != b'"' && quote != b'\'' {
59+
return None;
60+
}
61+
let first = text.find(quote as char)?;
62+
let last = text.rfind(quote as char)?;
63+
if first >= last {
64+
return None;
65+
}
66+
Some(text[first + 1..last].to_string())
67+
}
68+
69+
fn extract_template(node: &tree_sitter::Node, code: &[u8]) -> String {
70+
let mut cursor = node.walk();
71+
let mut out = String::new();
72+
for child in node.children(&mut cursor) {
73+
match child.kind() {
74+
"string_fragment" => {
75+
out.push_str(&String::from_utf8_lossy(
76+
&code[child.start_byte()..child.end_byte()],
77+
));
78+
}
79+
"escape_sequence" => {
80+
let raw = &code[child.start_byte()..child.end_byte()];
81+
if let Ok(text) = std::str::from_utf8(raw) {
82+
out.push_str(&decode_js_escapes(text));
83+
}
84+
}
85+
"template_substitution" => {
86+
// Replace ${...} with `1` — same trick as the Python finder.
87+
out.push('1');
88+
}
89+
_ => {}
90+
}
91+
}
92+
out
93+
}
94+
95+
fn decode_js_escapes(s: &str) -> String {
96+
let mut out = String::with_capacity(s.len());
97+
let mut chars = s.chars();
98+
while let Some(c) = chars.next() {
99+
if c != '\\' {
100+
out.push(c);
101+
continue;
102+
}
103+
match chars.next() {
104+
Some('"') => out.push('"'),
105+
Some('\'') => out.push('\''),
106+
Some('`') => out.push('`'),
107+
Some('\\') => out.push('\\'),
108+
Some('n') => out.push('\n'),
109+
Some('t') => out.push('\t'),
110+
Some('r') => out.push('\r'),
111+
Some('0') => out.push('\0'),
112+
// `\b`, `\f`, `\v`, `\xNN`, `\uNNNN`, `\u{...}`, octal: keep
113+
// literal text. Half-decoding adds risk without payoff.
114+
Some(other) => {
115+
out.push('\\');
116+
out.push(other);
117+
}
118+
None => out.push('\\'),
119+
}
120+
}
121+
out
122+
}

sqlshield/src/finder/mod.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
//! Locates SQL strings inside source files by walking a tree-sitter AST.
22
3+
mod go;
4+
mod javascript;
35
mod python;
46
mod rust;
57

@@ -13,7 +15,7 @@ pub struct QueryInCode {
1315
pub statements: Vec<sqlparser::ast::Statement>,
1416
}
1517

16-
pub const SUPPORTED_CODE_FILE_EXTENSIONS: [&str; 2] = ["py", "rs"];
18+
pub const SUPPORTED_CODE_FILE_EXTENSIONS: [&str; 6] = ["py", "rs", "go", "ts", "tsx", "js"];
1719

1820
pub fn find_queries_in_file(file_path: &Path) -> Result<Vec<QueryInCode>> {
1921
let dialect = sqlparser::dialect::GenericDialect {};
@@ -59,6 +61,25 @@ pub fn find_queries_in_code_with_dialect(
5961
tree_sitter_rust::language(),
6062
rust::extract_query_string_from_node,
6163
),
64+
"go" => (
65+
tree_sitter_go::language(),
66+
go::extract_query_string_from_node,
67+
),
68+
"js" => (
69+
tree_sitter_javascript::language(),
70+
javascript::extract_query_string_from_node,
71+
),
72+
// TypeScript and TSX share node kinds with JavaScript for the
73+
// string-literal shapes we care about, so the extractor is the
74+
// same; only the grammar changes.
75+
"ts" => (
76+
tree_sitter_typescript::language_typescript(),
77+
javascript::extract_query_string_from_node,
78+
),
79+
"tsx" => (
80+
tree_sitter_typescript::language_tsx(),
81+
javascript::extract_query_string_from_node,
82+
),
6283
other => return Err(SqlShieldError::UnsupportedFileExtension(other.to_string())),
6384
};
6485

0 commit comments

Comments
 (0)