Skip to content

Commit 257fc47

Browse files
davidsmfreireclaude
andcommitted
feat(schema): ingest CREATE VIEW and CREATE TABLE … AS SELECT
Real-world schema dumps lean heavily on views; the loader previously only saw CREATE TABLE so any reference to a view was reported as "table not found" — a false positive. Three coverage extensions in load_schema: - CREATE VIEW with an explicit column list `(a, b)` registers those names verbatim. References to the underlying body's column names (e.g. `SELECT id FROM renamed_view` when the view does `SELECT id AS uid`) correctly fail. - CREATE VIEW without an explicit list infers names from the body's projection — including aliased items (`SELECT id AS uid` exposes `uid`). - CREATE TABLE … AS SELECT (CTAS): when the original CREATE TABLE has no explicit column list but a `query: Some(...)`, columns are inferred from the source query's projection, same way views do. A small `project_column_names` helper walks the Query/SetExpr tree returning owned strings (the validator-side `project_columns` returns borrowed `&str` tied to the AST lifetime, which doesn't fit schema ingestion). UNION-shaped bodies use the left-branch projection per SQL standard. 5 tests cover view-with-explicit-list (valid + invalid via body names), view-without-list, view with aliased projection, CTAS, and qualified view names (analytics.user_view). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c094006 commit 257fc47

2 files changed

Lines changed: 168 additions & 5 deletions

File tree

sqlshield/src/schema/sql.rs

Lines changed: 94 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
use sqlparser::{
2-
ast::{AlterTableOperation, ColumnDef, ObjectName, Statement},
2+
ast::{
3+
AlterTableOperation, ColumnDef, Expr, ObjectName, Query, SelectItem, SetExpr, Statement,
4+
ViewColumnDef,
5+
},
36
dialect::GenericDialect,
47
parser::Parser,
58
};
@@ -16,14 +19,27 @@ pub fn load_schema(schema: &[u8]) -> Result<super::TablesAndColumns> {
1619
let mut tables: HashMap<String, HashSet<String>> = HashMap::new();
1720
for statement in statements {
1821
match statement {
19-
Statement::CreateTable { columns, name, .. } => {
20-
ingest_create_table(&name, &columns, &mut tables);
22+
Statement::CreateTable {
23+
columns,
24+
name,
25+
query,
26+
..
27+
} => {
28+
ingest_create_table(&name, &columns, query.as_deref(), &mut tables);
2129
}
2230
Statement::AlterTable {
2331
name, operations, ..
2432
} => {
2533
apply_alters(&name, &operations, &mut tables);
2634
}
35+
Statement::CreateView {
36+
name,
37+
columns,
38+
query,
39+
..
40+
} => {
41+
ingest_create_view(&name, &columns, &query, &mut tables);
42+
}
2743
_ => {}
2844
}
2945
}
@@ -33,13 +49,23 @@ pub fn load_schema(schema: &[u8]) -> Result<super::TablesAndColumns> {
3349
fn ingest_create_table(
3450
name: &ObjectName,
3551
columns: &[ColumnDef],
52+
query: Option<&Query>,
3653
tables: &mut HashMap<String, HashSet<String>>,
3754
) {
3855
let Some(last_ident) = name.0.last() else {
3956
return;
4057
};
41-
let columns_set: HashSet<String> =
42-
HashSet::from_iter(columns.iter().map(|e| e.name.value.clone()));
58+
59+
// CREATE TABLE … AS SELECT carries no explicit column list; infer the
60+
// names from the source query's projection. Plain CREATE TABLE uses
61+
// the explicit list. If both are present, the explicit list wins.
62+
let columns_set: HashSet<String> = if !columns.is_empty() {
63+
columns.iter().map(|e| e.name.value.clone()).collect()
64+
} else if let Some(q) = query {
65+
project_column_names(q).into_iter().collect()
66+
} else {
67+
HashSet::new()
68+
};
4369

4470
// Store the bare table name so unqualified queries resolve; if the
4571
// schema was declared as `schema.table`, ALSO store the fully
@@ -50,6 +76,69 @@ fn ingest_create_table(
5076
}
5177
}
5278

79+
fn ingest_create_view(
80+
name: &ObjectName,
81+
columns: &[ViewColumnDef],
82+
query: &Query,
83+
tables: &mut HashMap<String, HashSet<String>>,
84+
) {
85+
let Some(last_ident) = name.0.last() else {
86+
return;
87+
};
88+
// Explicit column list `CREATE VIEW v(a, b) AS …` overrides whatever
89+
// names the body projects.
90+
let columns_set: HashSet<String> = if !columns.is_empty() {
91+
columns.iter().map(|c| c.name.value.clone()).collect()
92+
} else {
93+
project_column_names(query).into_iter().collect()
94+
};
95+
tables.insert(last_ident.value.clone(), columns_set.clone());
96+
if name.0.len() > 1 {
97+
tables.insert(display_name(name), columns_set);
98+
}
99+
}
100+
101+
/// Owned-string version of `validation::project_columns`. Used at schema-
102+
/// ingestion time to capture the column names that a view or CTAS body
103+
/// projects.
104+
fn project_column_names(query: &Query) -> Vec<String> {
105+
project_names_of_body(query.body.as_ref())
106+
}
107+
108+
fn project_names_of_body(body: &SetExpr) -> Vec<String> {
109+
let mut names = Vec::new();
110+
match body {
111+
SetExpr::Select(select_box) => {
112+
for item in &select_box.projection {
113+
match item {
114+
SelectItem::UnnamedExpr(expr) => match expr {
115+
Expr::Identifier(ident) => names.push(ident.value.clone()),
116+
Expr::CompoundIdentifier(idents) => {
117+
if let Some(last) = idents.last() {
118+
names.push(last.value.clone());
119+
}
120+
}
121+
_ => {}
122+
},
123+
SelectItem::ExprWithAlias { alias, .. } => {
124+
names.push(alias.value.clone());
125+
}
126+
_ => {}
127+
}
128+
}
129+
}
130+
// For UNION/etc., the output names are taken from the left branch.
131+
SetExpr::SetOperation { left, .. } => {
132+
names.extend(project_names_of_body(left.as_ref()));
133+
}
134+
SetExpr::Query(inner) => {
135+
names.extend(project_column_names(inner.as_ref()));
136+
}
137+
_ => {}
138+
}
139+
names
140+
}
141+
53142
fn apply_alters(
54143
name: &ObjectName,
55144
operations: &[AlterTableOperation],

sqlshield/tests/schema_views.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
//! Schema ingestion: CREATE VIEW (with + without explicit column list)
2+
//! and CREATE TABLE … AS SELECT.
3+
4+
use sqlshield::validate_query;
5+
6+
#[test]
7+
fn view_with_explicit_column_list_publishes_those_names() {
8+
let schema = "
9+
CREATE TABLE users (id INT, name VARCHAR(255));
10+
CREATE VIEW user_summary (uid, full_name) AS
11+
SELECT id, name FROM users;
12+
";
13+
// Explicit column names are visible.
14+
let valid = validate_query("SELECT uid, full_name FROM user_summary", schema).unwrap();
15+
assert!(valid.is_empty(), "got: {valid:?}");
16+
17+
// The body's original column names are NOT visible through the view.
18+
let invalid = validate_query("SELECT id FROM user_summary", schema).unwrap();
19+
assert!(
20+
invalid.iter().any(|e| e.contains("`id`")),
21+
"got: {invalid:?}"
22+
);
23+
}
24+
25+
#[test]
26+
fn view_without_explicit_columns_infers_from_body() {
27+
let schema = "
28+
CREATE TABLE users (id INT, name VARCHAR(255));
29+
CREATE VIEW active_users AS SELECT id, name FROM users;
30+
";
31+
let valid = validate_query("SELECT id, name FROM active_users", schema).unwrap();
32+
assert!(valid.is_empty(), "got: {valid:?}");
33+
34+
let invalid = validate_query("SELECT email FROM active_users", schema).unwrap();
35+
assert!(
36+
invalid.iter().any(|e| e.contains("`email`")),
37+
"got: {invalid:?}"
38+
);
39+
}
40+
41+
#[test]
42+
fn view_with_aliased_projection() {
43+
// `SELECT id AS uid` projects `uid` (alias) — the view's column should be
44+
// `uid`, not the underlying `id`.
45+
let schema = "
46+
CREATE TABLE users (id INT);
47+
CREATE VIEW renamed AS SELECT id AS uid FROM users;
48+
";
49+
let valid = validate_query("SELECT uid FROM renamed", schema).unwrap();
50+
assert!(valid.is_empty(), "got: {valid:?}");
51+
}
52+
53+
#[test]
54+
fn create_table_as_select_publishes_projected_columns() {
55+
let schema = "
56+
CREATE TABLE users (id INT, name VARCHAR(255));
57+
CREATE TABLE archive AS SELECT id, name FROM users;
58+
";
59+
let valid = validate_query("SELECT id, name FROM archive", schema).unwrap();
60+
assert!(valid.is_empty(), "got: {valid:?}");
61+
62+
let invalid = validate_query("SELECT email FROM archive", schema).unwrap();
63+
assert!(invalid.iter().any(|e| e.contains("`email`")));
64+
}
65+
66+
#[test]
67+
fn view_can_be_referenced_by_qualified_name() {
68+
let schema = "
69+
CREATE TABLE users (id INT);
70+
CREATE VIEW analytics.user_view AS SELECT id FROM users;
71+
";
72+
let valid = validate_query("SELECT id FROM analytics.user_view", schema).unwrap();
73+
assert!(valid.is_empty(), "got: {valid:?}");
74+
}

0 commit comments

Comments
 (0)