From ec9079d9c9ee4c4d63c11f34f4732fec46560725 Mon Sep 17 00:00:00 2001 From: Matthew Bucci Date: Mon, 30 Oct 2023 12:19:10 -0700 Subject: [PATCH] don't split on ',' if within a function --- .gitignore | 2 +- pg4wp/rewriters/SelectSQLRewriter.php | 64 +++++++++++++++++++-------- phpunit.xml | 11 +++++ tests/rewriteTest.php | 34 ++++++++++++++ tests/stubs/select-com_1698679872.txt | 2 +- tests/stubs/select-com_1698679886.txt | 2 +- tests/verifyAgainstStubsTest.php | 9 +++- 7 files changed, 100 insertions(+), 24 deletions(-) create mode 100644 phpunit.xml create mode 100644 tests/rewriteTest.php diff --git a/.gitignore b/.gitignore index 416bb53..9de4a71 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ logs -tests/tools/.phpunit.result.cache \ No newline at end of file +.phpunit.result.cache \ No newline at end of file diff --git a/pg4wp/rewriters/SelectSQLRewriter.php b/pg4wp/rewriters/SelectSQLRewriter.php index 6f68f17..e56589a 100644 --- a/pg4wp/rewriters/SelectSQLRewriter.php +++ b/pg4wp/rewriters/SelectSQLRewriter.php @@ -16,11 +16,11 @@ class SelectSQLRewriter extends AbstractSQLRewriter error_log('[' . microtime(true) . "] Number of rows required for :\n$sql\n---------------------\n", 3, PG4WP_LOG . 'pg4wp_NUMROWS.log'); } } - + if(false !== strpos($sql, 'FOUND_ROWS()')) { // Here we convert the latest query into a COUNT query $sql = $GLOBALS['pg4wp_numrows_query']; - + // Remove the LIMIT clause if it exists $sql = preg_replace('/\s+LIMIT\s+\d+(\s*,\s*\d+)?/i', '', $sql); @@ -232,6 +232,26 @@ class SelectSQLRewriter extends AbstractSQLRewriter return $postgresSql; } + /** + * Transforms a given SQL query to include a GROUP BY clause if the SELECT statement has both aggregate + * and non-aggregate columns. This function is specifically designed to work with PostgreSQL. + * + * In PostgreSQL, a query that uses aggregate functions must group by all columns in the SELECT list that + * are not part of the aggregate functions. Failing to do so results in a syntax error. This function + * automatically adds a GROUP BY clause to meet this PostgreSQL requirement when both aggregate (COUNT, SUM, + * AVG, MIN, MAX) and non-aggregate columns are present. + * + * @param string $sql The SQL query string to be transformed. + * + * @return string The transformed SQL query string with appropriate GROUP BY clause if required. + * + * @throws Exception If the SQL query cannot be parsed or modified. + * + * @example + * Input: SELECT COUNT(id), username FROM users; + * Output: SELECT COUNT(id), username FROM users GROUP BY username; + * + */ protected function ensureGroupByOrAggregate(string $sql): string { // Check for system or session variables @@ -239,7 +259,7 @@ class SelectSQLRewriter extends AbstractSQLRewriter return $sql; } - // Regular expression to capture main SQL components + // Regular expression to capture main SQL components. $regex = '/(SELECT\s+)(.*?)(\s+FROM\s+)([^ ]+)(\s+WHERE\s+.*?(?= ORDER BY | GROUP BY | LIMIT |$))?(ORDER BY.*?(?= LIMIT |$))?(LIMIT.*?$)?/is'; // Capture main SQL components using regex @@ -247,22 +267,26 @@ class SelectSQLRewriter extends AbstractSQLRewriter return $sql; } - $selectClause = $matches[2] ?? ''; - $fromClause = $matches[4] ?? ''; - $whereClause = $matches[5] ?? ''; - $orderClause = $matches[6] ?? ''; - $limitClause = $matches[7] ?? ''; + $selectClause = trim($matches[2]) ?? ''; + $fromClause = trim($matches[4]) ?? ''; + $whereClause = trim($matches[5]) ?? ''; + $orderClause = trim($matches[6]) ?? ''; + $limitClause = trim($matches[7]) ?? ''; if (empty($selectClause) || empty($fromClause)) { return $sql; } - $columns = explode(',', $selectClause); + // Regular expression to match commas not within parentheses + $pattern = '/,(?![^\(]*\))/'; + // Split columns using a comma, and then trim each element + $columns = array_map('trim', preg_split($pattern, $selectClause)); + $aggregateColumns = []; $nonAggregateColumns = []; foreach ($columns as $col) { - $col = trim($col); + // Check for aggregate functions in the column if (preg_match('/(COUNT|SUM|AVG|MIN|MAX)\s*?\(/i', $col)) { $aggregateColumns[] = $col; } else { @@ -275,23 +299,25 @@ class SelectSQLRewriter extends AbstractSQLRewriter return $sql; } - $groupByClause = "GROUP BY " . implode(", ", $nonAggregateColumns); // Assemble new SQL query $postgresSql = "SELECT $selectClause FROM $fromClause"; - if (!empty(trim($whereClause))) { - $postgresSql .= " $whereClause"; + if (!empty($whereClause)) { + $postgresSql .= ' ' . $whereClause; } - $postgresSql .= " $groupByClause"; - - if (!empty(trim($orderClause))) { - $postgresSql .= " $orderClause"; + $groupByClause = "GROUP BY " . implode(", ", $nonAggregateColumns); + if (!empty($groupByClause)) { + $postgresSql .= ' ' . $groupByClause; } - if (!empty(trim($limitClause))) { - $postgresSql .= " $limitClause"; + if (!empty($orderClause)) { + $postgresSql .= ' ' . $orderClause; + } + + if (!empty($limitClause)) { + $postgresSql .= ' ' . $limitClause; } return $postgresSql; diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..02f5990 --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,11 @@ + + + + + tests + + + \ No newline at end of file diff --git a/tests/rewriteTest.php b/tests/rewriteTest.php new file mode 100644 index 0000000..3328587 --- /dev/null +++ b/tests/rewriteTest.php @@ -0,0 +1,34 @@ +assertSame($postgresql, $expected); + } + + + public function test_it_adds_group_by() + { + + $sql = 'SELECT COUNT(id), username FROM users'; + $expected = 'SELECT COUNT(id) AS count0, username FROM users GROUP BY username'; + $postgresql = pg4wp_rewrite($sql); + $this->assertSame($postgresql, $expected); + } +} \ No newline at end of file diff --git a/tests/stubs/select-com_1698679872.txt b/tests/stubs/select-com_1698679872.txt index e57aec9..a0e8cbc 100644 --- a/tests/stubs/select-com_1698679872.txt +++ b/tests/stubs/select-com_1698679872.txt @@ -1 +1 @@ -{"mysql":"SELECT comment_post_ID, COUNT(comment_ID) as num_comments FROM wp_comments WHERE comment_post_ID IN ( '51', '17', '11', '7' ) AND comment_approved = '0' GROUP BY comment_post_ID","postgresql":"SELECT \"comment_post_ID\" , COUNT(\"comment_ID\" ) as num_comments FROM wp_comments WHERE \"comment_post_ID\" IN ( '51', '17', '11', '7' ) AND comment_approved = '0' GROUP BY \"comment_post_ID\" "} \ No newline at end of file +{"mysql":"SELECT comment_post_ID, COUNT(comment_ID) as num_comments FROM wp_comments WHERE comment_post_ID IN ( '51', '17', '11', '7' ) AND comment_approved = '0' GROUP BY comment_post_ID","postgresql":"SELECT \"comment_post_ID\" , COUNT(\"comment_ID\" ) as num_comments FROM wp_comments WHERE \"comment_post_ID\" IN ( '51', '17', '11', '7' ) AND comment_approved = '0' GROUP BY \"comment_post_ID\" "} \ No newline at end of file diff --git a/tests/stubs/select-com_1698679886.txt b/tests/stubs/select-com_1698679886.txt index 5d03dc3..be2468e 100644 --- a/tests/stubs/select-com_1698679886.txt +++ b/tests/stubs/select-com_1698679886.txt @@ -1 +1 @@ -{"mysql":"SELECT comment_post_ID, COUNT(comment_ID) as num_comments FROM wp_comments WHERE comment_post_ID IN ( '55', '51', '17', '11', '7' ) AND comment_approved = '0' GROUP BY comment_post_ID","postgresql":"SELECT \"comment_post_ID\" , COUNT(\"comment_ID\" ) as num_comments FROM wp_comments WHERE \"comment_post_ID\" IN ( '55', '51', '17', '11', '7' ) AND comment_approved = '0' GROUP BY \"comment_post_ID\" "} \ No newline at end of file +{"mysql":"SELECT comment_post_ID, COUNT(comment_ID) as num_comments FROM wp_comments WHERE comment_post_ID IN ( '55', '51', '17', '11', '7' ) AND comment_approved = '0' GROUP BY comment_post_ID","postgresql":"SELECT \"comment_post_ID\" , COUNT(\"comment_ID\" ) as num_comments FROM wp_comments WHERE \"comment_post_ID\" IN ( '55', '51', '17', '11', '7' ) AND comment_approved = '0' GROUP BY \"comment_post_ID\" "} \ No newline at end of file diff --git a/tests/verifyAgainstStubsTest.php b/tests/verifyAgainstStubsTest.php index 626e6dd..3b21b5d 100644 --- a/tests/verifyAgainstStubsTest.php +++ b/tests/verifyAgainstStubsTest.php @@ -1,8 +1,13 @@