athena_cli/validation.rs
1//! SQL validation module for Athena CLI.
2//!
3//! This module provides functionality to validate SQL queries before sending them to AWS Athena.
4//! It uses the sqlparser crate to parse and validate SQL syntax according to ANSI SQL standards,
5//! which is compatible with Athena's SQL dialect.
6//!
7//! ## Features
8//!
9//! - SQL syntax validation using ANSI SQL standards
10//! - Detailed error messages for syntax issues
11//! - Validation before query execution to save time and costs
12
13use anyhow::{Context, Result};
14use sqlparser::ast::{Query, SetExpr, Statement};
15use sqlparser::dialect::AnsiDialect;
16use sqlparser::parser::Parser;
17
18/// Validates the syntax of an Athena SQL query.
19///
20/// This function uses the sqlparser crate to parse the query using ANSI SQL standards,
21/// which is compatible with Athena's SQL syntax. It returns a Result that indicates
22/// whether the query syntax is valid.
23///
24/// # Arguments
25///
26/// * `query` - The SQL query string to validate
27///
28/// # Returns
29///
30/// * `Ok(())` if the query syntax is valid
31/// * `Err(anyhow::Error)` with a descriptive error message if the syntax is invalid
32///
33/// # Examples
34///
35/// ```
36/// use athena_cli::validation::validate_query_syntax;
37///
38/// // Valid query
39/// let valid_query = "SELECT * FROM my_table WHERE id = 1";
40/// assert!(validate_query_syntax(valid_query).is_ok());
41///
42/// // Invalid query with trailing comma
43/// let invalid_query = "SELECT id, FROM my_table";
44/// assert!(validate_query_syntax(invalid_query).is_err());
45/// ```
46pub fn validate_query_syntax(query: &str) -> Result<()> {
47 // Use ANSI SQL dialect for standards-compliant parsing
48 let dialect = AnsiDialect {};
49
50 // Attempt to parse the SQL query
51 match Parser::parse_sql(&dialect, query) {
52 Ok(statements) => {
53 // Additional validation for SELECT statements
54 for stmt in statements {
55 if let Statement::Query(query_box) = stmt {
56 validate_select_query(*query_box)?;
57 }
58 }
59 Ok(())
60 }
61 Err(e) => {
62 // Return a user-friendly error message
63 Err(anyhow::anyhow!("SQL syntax error: {}", e))
64 .with_context(|| format!("Failed to parse query: {}", query))
65 }
66 }
67}
68
69/// Validates a SELECT query for common issues that might not be caught by the parser.
70fn validate_select_query(query: Query) -> Result<()> {
71 // Check if it's a simple SELECT query
72 if let SetExpr::Select(select) = *query.body {
73 // Check if FROM clause is missing when it should be present
74 if select.from.is_empty() {
75 // Check if it's a special case that doesn't require FROM
76 let is_special_case = select.projection.iter().any(|proj| {
77 // Check for special cases like SELECT 1, SELECT CURRENT_DATE, etc.
78 match proj {
79 sqlparser::ast::SelectItem::UnnamedExpr(expr) => {
80 matches!(expr, sqlparser::ast::Expr::Value(_))
81 || matches!(expr, sqlparser::ast::Expr::Function(_))
82 }
83 _ => false,
84 }
85 });
86
87 if !is_special_case {
88 return Err(anyhow::anyhow!(
89 "SQL syntax error: SELECT query missing FROM clause"
90 ));
91 }
92 }
93 }
94
95 Ok(())
96}
97
98/// Checks if the query is a DDL (Data Definition Language) statement.
99///
100/// DDL statements include CREATE, ALTER, DROP, etc. This function is useful
101/// for determining if a query will modify the database schema.
102///
103/// # Arguments
104///
105/// * `query` - The SQL query string to check
106///
107/// # Returns
108///
109/// * `true` if the query is a DDL statement
110/// * `false` otherwise
111///
112/// # Examples
113///
114/// ```
115/// use athena_cli::validation::is_ddl_statement;
116///
117/// assert!(is_ddl_statement("CREATE TABLE my_table (id INT)"));
118/// assert!(is_ddl_statement("DROP TABLE my_table"));
119/// assert!(!is_ddl_statement("SELECT * FROM my_table"));
120/// ```
121/// #[allow(dead_code)]
122//pub fn is_ddl_statement(query: &str) -> bool {
123//let query_upper = query.trim().to_uppercase();
124
125//query_upper.starts_with("CREATE ") ||
126//query_upper.starts_with("ALTER ") ||
127//query_upper.starts_with("DROP ") ||
128//query_upper.starts_with("TRUNCATE ") ||
129//query_upper.starts_with("RENAME ")
130//}
131
132/// Checks if the query is a DML (Data Manipulation Language) statement.
133///
134/// DML statements include SELECT, INSERT, UPDATE, DELETE, etc. This function is useful
135/// for determining if a query will modify data in the database.
136///
137/// # Arguments
138///
139/// * `query` - The SQL query string to check
140///
141/// # Returns
142///
143/// * `true` if the query is a DML statement
144/// * `false` otherwise
145///
146/// # Examples
147///
148/// ```
149/// use athena_cli::validation::is_dml_statement;
150///
151/// assert!(is_dml_statement("SELECT * FROM my_table"));
152/// assert!(is_dml_statement("INSERT INTO my_table VALUES (1, 'test')"));
153/// assert!(!is_dml_statement("CREATE TABLE my_table (id INT)"));
154/// ```
155/// #[allow(dead_code)]
156//pub fn is_dml_statement(query: &str) -> bool {
157//let query_upper = query.trim().to_uppercase();
158
159//query_upper.starts_with("SELECT ") ||
160//query_upper.starts_with("INSERT ") ||
161//query_upper.starts_with("UPDATE ") ||
162//query_upper.starts_with("DELETE ") ||
163//query_upper.starts_with("MERGE ")
164//}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169
170 #[test]
171 fn test_valid_syntax() {
172 let valid_queries = vec![
173 "SELECT * FROM my_table",
174 "SELECT id, name FROM my_table WHERE id > 10",
175 "SELECT COUNT(*) FROM my_table GROUP BY category",
176 "CREATE TABLE my_table (id INT, name STRING)",
177 "DROP TABLE my_table",
178 "INSERT INTO my_table VALUES (1, 'test')",
179 "WITH t AS (SELECT * FROM my_table) SELECT * FROM t",
180 "SELECT 1", // Simple constant select
181 "SELECT CURRENT_DATE", // Date function
182 "SELECT NOW()", // Current timestamp function
183 ];
184
185 for query in valid_queries {
186 assert!(
187 validate_query_syntax(query).is_ok(),
188 "Query should be valid: {}",
189 query
190 );
191 }
192 }
193
194 #[test]
195 fn test_invalid_syntax() {
196 let invalid_queries = vec![
197 "SELECT * FORM my_table", // Misspelled FROM
198 "SELECT id, FROM my_table", // Extra comma
199 "SELECT * FROM my_table WHERE", // Incomplete WHERE clause
200 "CREATE TABLE my_table (id INT, name STRING", // Missing closing parenthesis
201 "DROP TABLE", // Missing table name
202 "SELECT * WHERE id = 1", // Missing FROM clause
203 ];
204
205 for query in invalid_queries {
206 assert!(
207 validate_query_syntax(query).is_err(),
208 "Query should be invalid: {}",
209 query
210 );
211 }
212 }
213
214 //#[test]
215 //fn test_ddl_detection() {
216 //assert!(is_ddl_statement("CREATE TABLE my_table (id INT)"));
217 //assert!(is_ddl_statement("DROP TABLE my_table"));
218 //assert!(is_ddl_statement("ALTER TABLE my_table ADD COLUMN new_col INT"));
219 //assert!(is_ddl_statement("TRUNCATE TABLE my_table"));
220 //assert!(is_ddl_statement("RENAME TABLE old_table TO new_table"));
221
222 //assert!(!is_ddl_statement("SELECT * FROM my_table"));
223 //assert!(!is_ddl_statement("INSERT INTO my_table VALUES (1)"));
224 //}
225
226 //#[test]
227 //fn test_dml_detection() {
228 //assert!(is_dml_statement("SELECT * FROM my_table"));
229 //assert!(is_dml_statement("INSERT INTO my_table VALUES (1)"));
230 //assert!(is_dml_statement("UPDATE my_table SET col = 1"));
231 //assert!(is_dml_statement("DELETE FROM my_table WHERE id = 1"));
232
233 //assert!(!is_dml_statement("CREATE TABLE my_table (id INT)"));
234 //assert!(!is_dml_statement("DROP TABLE my_table"));
235 //}
236}