Skip to content

Commit b6fb4f2

Browse files
davidsmfreireclaude
andcommitted
fix: handle .format() {{ }} escapes in Python string extractor
The lazy `\{.*?\}` substitution greedily-from-left ate `{{key}` from `{{key}}`, leaving a stray `}` and breaking the SQL parse. Any query that referenced JSON-like literals (`'{{tag}}'`) or simply documented braces in a comment would be silently dropped. Pre-escape `{{`/`}}` with sentinel bytes (`\u{0001}` / `\u{0002}`) before the regex, then restore them as literal `{`/`}`. Single placeholders `{x}` continue to substitute as `1`. 3 tests cover single placeholder (no regression), the bare double-brace literal, and a mix of escapes adjacent to a real placeholder. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 13ec935 commit b6fb4f2

2 files changed

Lines changed: 64 additions & 6 deletions

File tree

sqlshield/src/finder/python.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,18 @@ pub fn extract_query_string_from_node(node: &tree_sitter::Node, code: &[u8]) ->
2626
}
2727
}
2828

29-
// If it isn't an fstring, but has interpolations to be formatted with .format later
30-
// tree_sitter will not store the interpolation node and the code above won't clean it
31-
content_as_string = INTERPOLATION_RE
32-
.replace_all(&content_as_string, "1")
33-
.to_string();
29+
// For `.format()`-style strings tree-sitter doesn't yield interpolation
30+
// nodes, so we sweep `{...}` placeholders here. `.format()` uses `{{`
31+
// and `}}` as literal braces — pre-escape them with sentinel bytes so
32+
// the lazy regex doesn't eat the doubled-up form (which would leave a
33+
// stray `}` and break the SQL parse), then restore single braces after.
34+
const ESC_OPEN: char = '\u{0001}';
35+
const ESC_CLOSE: char = '\u{0002}';
36+
let escaped = content_as_string
37+
.replace("{{", &ESC_OPEN.to_string())
38+
.replace("}}", &ESC_CLOSE.to_string());
39+
let substituted = INTERPOLATION_RE.replace_all(&escaped, "1");
40+
let restored = substituted.replace(ESC_OPEN, "{").replace(ESC_CLOSE, "}");
3441

35-
Some(content_as_string)
42+
Some(restored)
3643
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//! Python string-template extraction edge cases — escaped braces in
2+
//! `.format()` and f-strings.
3+
4+
use sqlshield::finder::find_queries_in_code;
5+
6+
fn extract(source: &str) -> Vec<Vec<sqlparser::ast::Statement>> {
7+
find_queries_in_code(source.as_bytes(), "py")
8+
.unwrap()
9+
.into_iter()
10+
.map(|q| q.statements)
11+
.collect()
12+
}
13+
14+
#[test]
15+
fn single_placeholder_is_replaced_with_literal_one() {
16+
// Existing behavior: `{x}` substitutes for a value-position placeholder.
17+
let source = r#"q = "SELECT name FROM users WHERE id = {x}".format(x=1)"#;
18+
let queries = extract(source);
19+
assert_eq!(queries.len(), 1);
20+
// The query should parse and contain the substituted `1`.
21+
let rendered = queries[0][0].to_string();
22+
assert!(
23+
rendered.contains("id = 1"),
24+
"expected substitution; got: {rendered}"
25+
);
26+
}
27+
28+
#[test]
29+
fn double_brace_is_preserved_as_literal_brace() {
30+
// `.format()` uses `{{` for a literal `{`. Without escape handling, the
31+
// greedy regex eats `{{key}` and leaves a stray `}`, breaking the parse.
32+
let source = r#"q = "SELECT * FROM users WHERE config = '{{key}}'".format()"#;
33+
let queries = extract(source);
34+
assert_eq!(queries.len(), 1, "query should parse cleanly");
35+
let rendered = queries[0][0].to_string();
36+
assert!(
37+
rendered.contains("'{key}'"),
38+
"double-brace should round-trip to single brace; got: {rendered}"
39+
);
40+
}
41+
42+
#[test]
43+
fn mixed_escapes_and_placeholders() {
44+
// `{{literal}}` next to `{real}`: literal preserved, real substituted.
45+
let source = r#"q = "SELECT * FROM users WHERE meta = '{{tag}}' AND id = {x}".format(x=1)"#;
46+
let queries = extract(source);
47+
assert_eq!(queries.len(), 1);
48+
let rendered = queries[0][0].to_string();
49+
assert!(rendered.contains("'{tag}'"));
50+
assert!(rendered.contains("id = 1"));
51+
}

0 commit comments

Comments
 (0)