Skip to content

Commit

Permalink
Refactored Table Lookup
Browse files Browse the repository at this point in the history
Refactored the code that walks a query's parsed tree and identies
which tables are represented in a query.
  • Loading branch information
xsist10 committed Sep 30, 2022
1 parent fb29784 commit 6a7d9ae
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 24 deletions.
68 changes: 45 additions & 23 deletions src/Engine/Entity/Query.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,39 +122,61 @@ public function linkTablesToQuery(Schema $schema, Database $database)
}

/**
* Return all the tables that are used in this query
* Return all the tables that are used in an expression
* @return array
* @throws \Exception
*/
public function getTableNamesInQuery(): array
public function findTablesInExpression($fragment): array
{
$parsed = $this->query_parser->parsed;
if (!isset($parsed['FROM'])) {
return [];
}
if (isset($fragment['expr_type'])) {
if ($fragment['expr_type'] === 'table') {
// DIGEST should always contain the `schema`.`table`
$parts = $fragment['no_quotes']['parts'];
$table = [];
$table['name'] = array_pop($parts);
if (!empty($parts)) {
$table['schema'] = array_pop($parts);
}
if (!empty($fragment['alias'])) {
$table['alias'] = $fragment['alias']['name'];
} else {
$table['alias'] = $table['name'];
}
return [ $table ];
}

$response = [];
foreach ($parsed['FROM'] as $from) {
// It's possible that the FROM is a sub-query. We'll need to deal with this soon
if (!isset($from['no_quotes'])) {
throw new \Exception("Sub-queries not handled at the moment.");
if ($fragment['expr_type'] === 'subquery') {
return $this->findTablesInExpression($fragment['sub_tree']);
}

// DIGEST should always contain the `schema`.`table`
$parts = $from['no_quotes']['parts'];
$table = [];
$table['name'] = array_pop($parts);
if (!empty($parts)) {
$table['schema'] = array_pop($parts);
if ($fragment['expr_type'] === 'table_expression') {
$tables = [];
foreach ($fragment['sub_tree'] as $subtree) {
$tables = array_merge($tables, $this->findTablesInExpression($subtree));
}

return $tables;
}
if (!empty($from['alias'])) {
$table['alias'] = $from['alias']['name'];
} else {
$table['alias'] = $table['name'];
} elseif (isset($fragment['FROM'])) {
$tables = [];
foreach ($fragment['FROM'] as $from) {
$tables = array_merge($tables, $this->findTablesInExpression($from));
}
$response[] = $table;

return $tables;
}

return $response;
// It's possible that we've encountered something we didn't prepare for
throw new \Exception("Uncertain on how to parse this query.");
}

/**
* Return all the tables that are used in this query
* @return array
*/
public function getTableNamesInQuery(): array
{
return array_unique($this->findTablesInExpression($this->query_parser->parsed), SORT_REGULAR);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/Engine/Factory.php
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ private function getEventStatementsSummary(Schema $schema, Database $database):
$query->linkTablesToQuery($schema, $database);
$schema->addQuery($query);
} catch (Exception $exception) {
$this->log()->warning("Skipped ". $querySummaryByDigest['DIGEST'] ." as subqueries aren't supported.");
$this->log()->warning("Skipping ". $querySummaryByDigest['DIGEST'] .". " . $exception->getMessage());
}
}
}
Expand Down
151 changes: 151 additions & 0 deletions tests/Engine/Entity/QueryTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public function createQuery($digest)
$query->linkTablesToQuery($this->schema, $this->database);
return $query;
}

public function test__fetchColumnsModifiedByFunctions()
{
$query = $this->createQuery("SELECT * FROM users");
Expand All @@ -53,4 +54,154 @@ public function test__fetchColumnsModifiedByFunctions()
$query = $this->createQuery("SELECT * FROM test.users WHERE DATE_FORMAT(users.id)=1");
$this->assertEquals([['table' => $this->table, 'column' => $this->column]], $query->fetchColumnsModifiedByFunctions());
}

public function test__getTableNamesInQuery()
{
$query = new Query(
"SELECT
order_month,
order_day,
COUNT(DISTINCT order_id) AS num_orders,
COUNT(book_id) AS num_books,
SUM(price) AS total_price,
SUM(COUNT(book_id)) OVER (
PARTITION BY order_month
ORDER BY order_day
) AS running_total_num_books,
LAG(COUNT(book_id), 7) OVER (ORDER BY order_day) AS prev_books
FROM (
SELECT
DATE_FORMAT(co.order_date, '%Y-%m') AS order_month,
DATE_FORMAT(co.order_date, '%Y-%m-%d') AS order_day,
co.order_id,
ol.book_id,
ol.price
FROM cust_order co
INNER JOIN order_line ol ON co.order_id = ol.order_id
) sub
GROUP BY order_month, order_day
ORDER BY order_day ASC;"
);
$tables = $query->getTableNamesInQuery();
$this->assertCount(2, $tables, "Found correct number of tables");

$query = new Query(
"SELECT
DATE_FORMAT(co.order_date, '%Y-%m') AS order_month,
DATE_FORMAT(co.order_date, '%Y-%m-%d') AS order_day,
COUNT(DISTINCT co.order_id) AS num_orders,
COUNT(ol.book_id) AS num_books,
SUM(ol.price) AS total_price,
SUM(COUNT(ol.book_id)) OVER (
PARTITION BY DATE_FORMAT(co.order_date, '%Y-%m')
ORDER BY DATE_FORMAT(co.order_date, '%Y-%m-%d')
) AS running_total_num_books
FROM cust_order co
INNER JOIN order_line ol ON co.order_id = ol.order_id
GROUP BY
DATE_FORMAT(co.order_date, '%Y-%m'),
DATE_FORMAT(co.order_date, '%Y-%m-%d')
ORDER BY co.order_date ASC"
);
$tables = $query->getTableNamesInQuery();
$this->assertCount(2, $tables, "Found correct number of tables");

$query = new Query(
"SELECT
c.calendar_date,
c.calendar_year,
c.calendar_month,
c.calendar_dayname,
COUNT(DISTINCT sub.order_id) AS num_orders,
COUNT(sub.book_id) AS num_books,
SUM(sub.price) AS total_price,
SUM(COUNT(sub.book_id)) OVER (
PARTITION BY c.calendar_year, c.calendar_month
ORDER BY c.calendar_date
) AS running_total_num_books,
LAG(COUNT(sub.book_id), 7) OVER (ORDER BY c.calendar_date) AS prev_books
FROM calendar_days c
LEFT JOIN (
SELECT
DATE_FORMAT(co.order_date, '%Y-%m') AS order_month,
DATE_FORMAT(co.order_date, '%Y-%m-%d') AS order_day,
co.order_id,
ol.book_id,
ol.price
FROM cust_order co
INNER JOIN order_line ol ON co.order_id = ol.order_id
) sub ON c.calendar_date = sub.order_day
GROUP BY c.calendar_date, c.calendar_year, c.calendar_month, c.calendar_dayname
ORDER BY c.calendar_date ASC"
);
$tables = $query->getTableNamesInQuery();
$this->assertCount(3, $tables, "Found correct number of tables");


$query = new Query(
"SELECT MIN(place_id) AS place_id,
name,
administration,
country,
MAX(relevance) AS relevance
FROM (
SELECT *,
(rescale(population, mn_pop, mx_pop) * 2.4) +
(rescale(name_relevance, mn_plre, mx_plre) * 0.0) +
(rescale(distance, mx_dist, mn_dist) * 2.2) + -- inverted
if(t.country_id = (
SELECT country_id FROM country_names
WHERE name = 'Germany'
LIMIT 1
), 0.6, 0)
/ 4
AS relevance
FROM (
SELECT resl.*,
MIN(aggr.population) AS mn_pop,
MAX(aggr.population) AS mx_pop,
MIN(aggr.name_relevance) AS mn_plre,
MAX(aggr.name_relevance) AS mx_plre,
MIN(aggr.distance) AS mn_dist,
MAX(aggr.distance) AS mx_dist
FROM (
SELECT p.population,
ST_DISTANCE_SPHERE(position, ST_POINTFROMTEXT(ST_ASTEXT(POINT(7.4653, 51.5136)), 4326)) AS distance,
MATCH(pn.name) AGAINST('+dor*' IN BOOLEAN MODE) AS name_relevance
FROM places p
JOIN place_names pn ON p.id = pn.place_id
JOIN admin_names an ON p.admin_id = an.admin_id
JOIN country_names cn ON p.country_id = cn.country_id
JOIN languages l ON pn.language_id = l.id AND an.language_id = l.id AND cn.language_id = l.id
WHERE l.code_3 = 'ENG' AND MATCH(pn.name) AGAINST('+dor*' IN BOOLEAN MODE)
LIMIT 200
) aggr
JOIN (
SELECT p.id AS place_id,
pn.name AS name,
an.name AS administration,
an.abbr AS admin_abbr,
cn.name AS country,
p.population AS population,
p.country_id AS country_id,
ST_DISTANCE_SPHERE(position, ST_POINTFROMTEXT(ST_ASTEXT(POINT(7.4653, 51.5136)), 4326)) AS distance,
MATCH(pn.name) AGAINST('+rom*' IN BOOLEAN MODE) AS name_relevance
FROM places p
JOIN place_names pn ON p.id = pn.place_id
JOIN admin_names an ON p.admin_id = an.admin_id
JOIN country_names cn ON p.country_id = cn.country_id
JOIN languages l ON pn.language_id = l.id AND an.language_id = l.id AND cn.language_id = l.id
WHERE l.code_3 = 'ENG' AND MATCH(pn.name) AGAINST('+dor*' IN BOOLEAN MODE)
LIMIT 200
) resl
GROUP BY place_id, resl.name, resl.administration, resl.admin_abbr, resl.country
) t
) t2
WHERE place_id is not null
GROUP BY country, administration, admin_abbr, name
ORDER BY relevance DESC;"
);
$tables = $query->getTableNamesInQuery();
$this->assertCount(5, $tables, "Found correct number of tables");
}
}

0 comments on commit 6a7d9ae

Please sign in to comment.