Skip to content

Commit dfdada9

Browse files
davidsmfreireclaude
andcommitted
feat: case-insensitive identifier matching
`SELECT Id FROM Users` against a schema declaring `id`/`users` was reported as missing. SQL dialects vary on case-sensitivity (Postgres folds to lower, ANSI to upper, BigQuery is strict, etc.) and the case mismatch produces noisy false positives, so default to ASCII case-insensitive comparisons throughout. Implementation: - schema/sql introduces a `lc()` helper and case-folds every name it inserts into the `tables` map (CREATE TABLE, CREATE VIEW, CTAS, ALTER TABLE add/drop/rename column, qualified twins, the bare key). - Lookup sites case-fold their input before consulting the map: asserts::is_relation_in_schema, table_ref::resolve_table_columns, column_in_relation, and INSERT/UPDATE column-list checks. - For the borrowed-`&str` extras map (CTE / derived-table aliases tied to AST lifetimes), case-insensitive lookups via three small helpers in asserts: extras_contains, extras_get, set_contains_ci. - resolve_qualified now compares `rel.qualifier()` to the user's qualifier with `eq_ignore_ascii_case`. Error messages preserve the user's original casing — `Column \`BOGUS\` not found in table \`users\`` — only matching is folded. Quoted-vs-unquoted distinction (`"Id"` vs `id` in Postgres) is not modeled; sqlshield treats both as the same identifier. Per-dialect folding rules can layer on top later. Schema module's `sql` is now `pub(crate)` so the `lc` helper can be imported from validation. Otherwise unchanged. 8 tests cover lower-vs-upper schemas, mixed-case qualifiers, CTE references, ALTER TABLE, schema-qualified tables, INSERT target column lists, and a confirmatory test that genuinely missing columns still error (with the original casing in the message). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 257fc47 commit dfdada9

8 files changed

Lines changed: 146 additions & 30 deletions

File tree

sqlshield/src/schema/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Parses schema definitions into the `TablesAndColumns` map consumed by validation.
22
3-
mod sql;
3+
pub(crate) mod sql;
44

55
use std::{
66
collections::{HashMap, HashSet},

sqlshield/src/schema/sql.rs

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,24 @@ fn ingest_create_table(
6060
// names from the source query's projection. Plain CREATE TABLE uses
6161
// the explicit list. If both are present, the explicit list wins.
6262
let columns_set: HashSet<String> = if !columns.is_empty() {
63-
columns.iter().map(|e| e.name.value.clone()).collect()
63+
columns.iter().map(|e| lc(&e.name.value)).collect()
6464
} else if let Some(q) = query {
65-
project_column_names(q).into_iter().collect()
65+
project_column_names(q)
66+
.into_iter()
67+
.map(|s| lc(&s))
68+
.collect()
6669
} else {
6770
HashSet::new()
6871
};
6972

7073
// Store the bare table name so unqualified queries resolve; if the
7174
// schema was declared as `schema.table`, ALSO store the fully
7275
// qualified form so qualified queries can be resolved strictly.
73-
tables.insert(last_ident.value.clone(), columns_set.clone());
76+
// Both keys are case-folded: identifier matching is ASCII case-insensitive
77+
// throughout sqlshield.
78+
tables.insert(lc(&last_ident.value), columns_set.clone());
7479
if name.0.len() > 1 {
75-
tables.insert(display_name(name), columns_set);
80+
tables.insert(lc(&display_name(name)), columns_set);
7681
}
7782
}
7883

@@ -88,16 +93,26 @@ fn ingest_create_view(
8893
// Explicit column list `CREATE VIEW v(a, b) AS …` overrides whatever
8994
// names the body projects.
9095
let columns_set: HashSet<String> = if !columns.is_empty() {
91-
columns.iter().map(|c| c.name.value.clone()).collect()
96+
columns.iter().map(|c| lc(&c.name.value)).collect()
9297
} else {
93-
project_column_names(query).into_iter().collect()
98+
project_column_names(query)
99+
.into_iter()
100+
.map(|s| lc(&s))
101+
.collect()
94102
};
95-
tables.insert(last_ident.value.clone(), columns_set.clone());
103+
tables.insert(lc(&last_ident.value), columns_set.clone());
96104
if name.0.len() > 1 {
97-
tables.insert(display_name(name), columns_set);
105+
tables.insert(lc(&display_name(name)), columns_set);
98106
}
99107
}
100108

109+
/// Case-fold to ASCII lowercase. Used at every identifier insertion and
110+
/// lookup site so the schema map and query-side identifiers compare
111+
/// case-insensitively.
112+
pub(crate) fn lc(s: &str) -> String {
113+
s.to_ascii_lowercase()
114+
}
115+
101116
/// Owned-string version of `validation::project_columns`. Used at schema-
102117
/// ingestion time to capture the column names that a view or CTAS body
103118
/// projects.
@@ -162,10 +177,10 @@ fn target_keys(name: &ObjectName, tables: &HashMap<String, HashSet<String>>) ->
162177
let Some(last) = name.0.last() else {
163178
return Vec::new();
164179
};
165-
let bare = last.value.as_str();
180+
let bare = lc(&last.value);
166181
if name.0.len() > 1 {
167182
// Qualified ALTER: target only the exact qualified key.
168-
let q = display_name(name);
183+
let q = lc(&display_name(name));
169184
if tables.contains_key(&q) {
170185
return vec![q];
171186
}
@@ -180,7 +195,7 @@ fn target_keys(name: &ObjectName, tables: &HashMap<String, HashSet<String>>) ->
180195
k.as_str() == bare
181196
|| k.rsplit('.')
182197
.next()
183-
.is_some_and(|seg| seg == bare && k != &bare)
198+
.is_some_and(|seg| seg == bare && k.as_str() != bare)
184199
})
185200
.cloned()
186201
.collect()
@@ -189,17 +204,17 @@ fn target_keys(name: &ObjectName, tables: &HashMap<String, HashSet<String>>) ->
189204
fn apply_one(cols: &mut HashSet<String>, op: &AlterTableOperation) {
190205
match op {
191206
AlterTableOperation::AddColumn { column_def, .. } => {
192-
cols.insert(column_def.name.value.clone());
207+
cols.insert(lc(&column_def.name.value));
193208
}
194209
AlterTableOperation::DropColumn { column_name, .. } => {
195-
cols.remove(column_name.value.as_str());
210+
cols.remove(lc(&column_name.value).as_str());
196211
}
197212
AlterTableOperation::RenameColumn {
198213
old_column_name,
199214
new_column_name,
200215
} => {
201-
if cols.remove(old_column_name.value.as_str()) {
202-
cols.insert(new_column_name.value.clone());
216+
if cols.remove(lc(&old_column_name.value).as_str()) {
217+
cols.insert(lc(&new_column_name.value));
203218
}
204219
}
205220
// Other ops (constraints, RLS, RENAME TABLE, …) don't change the

sqlshield/src/validation/asserts.rs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
use std::collections::{HashMap, HashSet};
22

33
use crate::schema;
4+
use crate::schema::sql::lc;
45

56
/// Returns `Some(name)` if the relation cannot be resolved against the schema
67
/// or CTE extras. Qualified references (`schema.table`) require an exact
78
/// qualified match; unqualified references match on the bare table name.
9+
/// Comparisons are ASCII case-insensitive.
810
pub fn is_relation_in_schema(
911
relation: &sqlparser::ast::TableFactor,
1012
schema: &schema::TablesAndColumns,
@@ -18,25 +20,48 @@ pub fn is_relation_in_schema(
1820
.map(|e| e.value.as_str())
1921
.collect::<Vec<&str>>()
2022
.join(".");
23+
let name_full_lc = lc(&name_full);
2124

2225
if name.0.len() > 1 {
23-
if schema.contains_key(&name_full) || extras.contains_key(name_full.as_str()) {
26+
if schema.contains_key(&name_full_lc) || extras_contains(extras, &name_full_lc) {
2427
return None;
2528
}
2629
return Some(name_full);
2730
}
2831

29-
let last = name
32+
let last_lc = lc(&name
3033
.0
3134
.last()
3235
.expect("sqlparser guarantees ObjectName has ≥1 ident")
33-
.value
34-
.as_str();
35-
if schema.contains_key(last) || extras.contains_key(last) {
36+
.value);
37+
if schema.contains_key(&last_lc) || extras_contains(extras, &last_lc) {
3638
return None;
3739
}
3840
Some(name_full)
3941
}
4042
_ => None,
4143
}
4244
}
45+
46+
/// Case-insensitive lookup against the borrowed-`&str` extras map. Extras
47+
/// keys come from query-side identifiers (CTE / derived-table aliases) and
48+
/// aren't lowercased at insertion since they're tied to AST lifetimes.
49+
pub(crate) fn extras_contains(extras: &HashMap<&str, HashSet<&str>>, key: &str) -> bool {
50+
extras.keys().any(|k| k.eq_ignore_ascii_case(key))
51+
}
52+
53+
/// Case-insensitive lookup that returns the matching value.
54+
pub(crate) fn extras_get<'a>(
55+
extras: &'a HashMap<&'a str, HashSet<&'a str>>,
56+
key: &str,
57+
) -> Option<&'a HashSet<&'a str>> {
58+
extras
59+
.iter()
60+
.find(|(k, _)| k.eq_ignore_ascii_case(key))
61+
.map(|(_, v)| v)
62+
}
63+
64+
/// Case-insensitive `set.contains` for a borrowed `&str` set.
65+
pub(crate) fn set_contains_ci(set: &HashSet<&str>, needle: &str) -> bool {
66+
set.iter().any(|s| s.eq_ignore_ascii_case(needle))
67+
}

sqlshield/src/validation/clauses/insert.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use sqlparser::ast::{Ident, ObjectName};
44

5+
use crate::schema::sql::lc;
56
use crate::schema::TablesAndColumns;
67

78
use super::table_ref::{display_name, resolve_table_columns};
@@ -22,7 +23,7 @@ pub(crate) fn validate_insert(
2223
};
2324

2425
for col in columns {
25-
if !cols.contains(col.value.as_str()) {
26+
if !cols.contains(&lc(&col.value)) {
2627
errors.push(format!(
2728
"Column `{}` not found in table `{}`",
2829
col.value,

sqlshield/src/validation/clauses/select.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use sqlparser::ast::{
55
SelectItem, TableFactor, TableWithJoins,
66
};
77

8+
use crate::schema::sql::lc;
89
use crate::{schema, validation::asserts};
910

1011
use super::ClauseValidation;
@@ -177,17 +178,20 @@ fn collect_from_factor<'a>(factor: &'a TableFactor, out: &mut Vec<VisibleRelatio
177178
/// the real schema or CTE-derived extras). Returns `Some(true)` if yes,
178179
/// `Some(false)` if the relation is known but the column isn't, and `None`
179180
/// if the relation is entirely unknown (caller should not over-report).
181+
/// All identifier comparisons are ASCII case-insensitive.
180182
fn column_in_relation(
181183
col: &str,
182184
rel: &VisibleRelation<'_>,
183185
schema: &schema::TablesAndColumns,
184186
extras: &HashMap<&str, HashSet<&str>>,
185187
) -> Option<bool> {
186-
if let Some(cols) = schema.get(rel.name) {
187-
return Some(cols.contains(col));
188+
let rel_name_lc = lc(rel.name);
189+
let col_lc = lc(col);
190+
if let Some(cols) = schema.get(&rel_name_lc) {
191+
return Some(cols.contains(&col_lc));
188192
}
189-
if let Some(cols) = extras.get(rel.name) {
190-
return Some(cols.contains(col));
193+
if let Some(cols) = asserts::extras_get(extras, rel.name) {
194+
return Some(asserts::set_contains_ci(cols, col));
191195
}
192196
None
193197
}
@@ -230,7 +234,9 @@ fn resolve_qualified(
230234
schema: &schema::TablesAndColumns,
231235
extras: &HashMap<&str, HashSet<&str>>,
232236
) -> Option<String> {
233-
let matched = relations.iter().find(|r| r.qualifier() == qualifier)?;
237+
let matched = relations
238+
.iter()
239+
.find(|r| r.qualifier().eq_ignore_ascii_case(qualifier))?;
234240
match column_in_relation(col, matched, schema, extras) {
235241
Some(false) => Some(format!(
236242
"Column `{col}` not found in table `{}`",

sqlshield/src/validation/clauses/table_ref.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ use std::collections::HashSet;
44

55
use sqlparser::ast::ObjectName;
66

7+
use crate::schema::sql::lc;
78
use crate::schema::TablesAndColumns;
89

910
/// Human-readable form of `ObjectName`: `public.users` or `users`.
11+
/// Preserves the user's casing for error messages.
1012
pub(crate) fn display_name(name: &ObjectName) -> String {
1113
name.0
1214
.iter()
@@ -17,13 +19,14 @@ pub(crate) fn display_name(name: &ObjectName) -> String {
1719

1820
/// Resolve a table reference to its column set, respecting qualified vs.
1921
/// unqualified lookup semantics (see `asserts::is_relation_in_schema`).
22+
/// Identifier matching is ASCII case-insensitive.
2023
pub(crate) fn resolve_table_columns<'a>(
2124
name: &ObjectName,
2225
schema: &'a TablesAndColumns,
2326
) -> Option<&'a HashSet<String>> {
2427
if name.0.len() > 1 {
25-
return schema.get(&display_name(name));
28+
return schema.get(&lc(&display_name(name)));
2629
}
2730
let last = name.0.last()?.value.as_str();
28-
schema.get(last)
31+
schema.get(&lc(last))
2932
}

sqlshield/src/validation/clauses/update.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::collections::{HashMap, HashSet};
44

55
use sqlparser::ast::{Assignment, Expr, TableFactor, TableWithJoins};
66

7+
use crate::schema::sql::lc;
78
use crate::schema::TablesAndColumns;
89
use crate::validation::asserts;
910

@@ -33,7 +34,7 @@ pub(crate) fn validate_update<'a>(
3334
let Some(last) = assignment.id.last() else {
3435
continue;
3536
};
36-
if !cols.contains(last.value.as_str()) {
37+
if !cols.contains(&lc(&last.value)) {
3738
errors.push(format!(
3839
"Column `{}` not found in table `{}`",
3940
last.value,
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//! Identifier matching is case-insensitive (ASCII fold) — the schema-vs-
2+
//! query case mismatch was a stream of false positives.
3+
4+
use sqlshield::validate_query;
5+
6+
#[test]
7+
fn lowercase_schema_uppercase_query() {
8+
let schema = "CREATE TABLE users (id INT, name VARCHAR(255));";
9+
let errs = validate_query("SELECT ID, NAME FROM USERS", schema).unwrap();
10+
assert!(errs.is_empty(), "got: {errs:?}");
11+
}
12+
13+
#[test]
14+
fn uppercase_schema_lowercase_query() {
15+
let schema = "CREATE TABLE Users (Id INT, FullName VARCHAR(255));";
16+
let errs = validate_query("SELECT id, fullname FROM users", schema).unwrap();
17+
assert!(errs.is_empty(), "got: {errs:?}");
18+
}
19+
20+
#[test]
21+
fn mixed_case_qualifier_resolves() {
22+
let schema = "CREATE TABLE users (id INT);";
23+
let errs = validate_query("SELECT U.Id FROM Users U", schema).unwrap();
24+
assert!(errs.is_empty(), "got: {errs:?}");
25+
}
26+
27+
#[test]
28+
fn truly_missing_column_still_errors() {
29+
let schema = "CREATE TABLE users (id INT);";
30+
let errs = validate_query("SELECT BOGUS FROM users", schema).unwrap();
31+
// Error message reports the column name as the user wrote it.
32+
assert!(errs.iter().any(|e| e.contains("BOGUS")), "got: {errs:?}");
33+
}
34+
35+
#[test]
36+
fn cte_reference_is_case_insensitive() {
37+
let schema = "CREATE TABLE users (id INT);";
38+
let sql = "WITH MyCTE AS (SELECT id FROM users) SELECT mycte.id FROM MYCTE";
39+
let errs = validate_query(sql, schema).unwrap();
40+
assert!(errs.is_empty(), "got: {errs:?}");
41+
}
42+
43+
#[test]
44+
fn alter_table_case_insensitive() {
45+
let schema = "
46+
CREATE TABLE users (id INT);
47+
ALTER TABLE Users ADD COLUMN Email VARCHAR(255);
48+
";
49+
let errs = validate_query("SELECT email FROM users", schema).unwrap();
50+
assert!(errs.is_empty(), "got: {errs:?}");
51+
}
52+
53+
#[test]
54+
fn schema_qualified_table_case_insensitive() {
55+
let schema = "CREATE TABLE Public.Users (id INT);";
56+
let errs = validate_query("SELECT id FROM public.users", schema).unwrap();
57+
assert!(errs.is_empty(), "got: {errs:?}");
58+
}
59+
60+
#[test]
61+
fn insert_target_columns_case_insensitive() {
62+
let schema = "CREATE TABLE users (id INT, name VARCHAR(255));";
63+
let errs = validate_query("INSERT INTO Users (ID, NAME) VALUES (1, 'a')", schema).unwrap();
64+
assert!(errs.is_empty(), "got: {errs:?}");
65+
}

0 commit comments

Comments
 (0)