Skip to content

Commit 9c3afe6

Browse files
committed
Extract and split disambiguation logic for reuse in GROUP BY and HAVING
1 parent e716007 commit 9c3afe6

File tree

1 file changed

+183
-158
lines changed

1 file changed

+183
-158
lines changed

wp-includes/sqlite-ast/class-wp-sqlite-driver.php

Lines changed: 183 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -2916,137 +2916,33 @@ private function translate_qualified_identifier(
29162916
*/
29172917
private function translate_query_expression( WP_Parser_Node $node ): string {
29182918
/*
2919-
* When the ORDER BY clause is present, we need to make sure it doesn't
2920-
* cause an "ambiguous column name" error.
2921-
*
2922-
* In SQLite, all column names that exist in multiple tables used in the
2923-
* query must be fully qualified in the ORDER BY clause. In MySQL, these
2924-
* can be disambiguated in the SELECT item list.
2925-
*
2926-
* For example, with tables "t1" and "t2" both having a "name" column,
2927-
* the following query will cause an "ambiguous column name" error in
2928-
* SQLite, but not in MySQL:
2929-
*
2930-
* SELECT t1.name FROM t1 JOIN t2 ON t2.t1_id = t1.id ORDER BY name
2931-
*
2932-
* This is because MySQL first considers the "name" column that was used
2933-
* in the SELECT list. If it is unambiguous, it will be used in ORDER BY.
2934-
*
2935-
* To address this, let's look for unqualified column references in the
2936-
* ORDER BY clause and try to qualify them using the SELECT item list.
2937-
* In other words, the above query will be rewritten as follows:
2938-
*
2939-
* SELECT t1.name FROM t1 JOIN t2 ON t2.t1_id = t1.id ORDER BY t1.name
2940-
*
2941-
* Note that the ORDER BY column was rewritten from "name" to "t1.name".
2942-
*
2943-
* @TODO: When multi-database support is implemented, we'll also need to
2944-
* consider column references in forms like "db.table.column".
2919+
* When the ORDER BY clause is present, we need to disambiguate the item
2920+
* list and make sure they don't cause an "ambiguous column name" error.
29452921
*/
29462922
$disambiguated_order_list = array();
29472923
$order_clause = $node->get_first_child_node( 'orderClause' );
29482924
if ( $order_clause ) {
29492925
$order_list = $order_clause->get_first_child_node( 'orderList' );
29502926
$select_item_list = $node->get_first_descendant_node( 'selectItemList' );
29512927

2952-
// Create a map of SELECT item column names to their qualified values.
2953-
$disambiguation_map = array();
2954-
foreach ( $select_item_list->get_child_nodes() as $select_item ) {
2955-
/*
2956-
* [GRAMMAR]
2957-
* selectItem: tableWild | (expr selectAlias?)
2958-
*/
2959-
2960-
// Skip when a "tableWild" node is used (no "expr" node).
2961-
$select_item_expr = $select_item->get_first_child_node( 'expr' );
2962-
if ( ! $select_item_expr ) {
2963-
continue;
2964-
}
2965-
2966-
// A SELECT item alias always needs to be preserved as-is.
2967-
$alias = $select_item->get_first_child_node( 'selectAlias' );
2968-
if ( $alias ) {
2969-
$alias_value = $this->translate( $alias->get_first_child_node() );
2970-
$disambiguation_map[ $alias_value ] = array( $alias_value );
2971-
continue;
2972-
}
2973-
2974-
// Skip when there is no column listed (no "columnRef" node).
2975-
$select_column_ref = $select_item_expr->get_first_descendant_node( 'columnRef' );
2976-
if ( ! $select_column_ref ) {
2977-
continue;
2978-
}
2979-
2980-
// Skip when the column reference is not qualified (no "dotIdentifier" node).
2981-
$dot_identifiers = $select_column_ref->get_descendant_nodes( 'dotIdentifier' );
2982-
if ( 0 === count( $dot_identifiers ) ) {
2983-
continue;
2984-
}
2985-
2986-
// Support also parenthesized column references (e.g. "(t.id)").
2987-
$select_item_expr = $this->unnest_parenthesized_expression( $select_item_expr );
2988-
2989-
// Consider only simple and parenthesized column references.
2990-
$expr_value = $this->translate( $select_item_expr );
2991-
$column_value = $this->translate( $select_column_ref );
2992-
if ( $expr_value !== $column_value ) {
2993-
continue;
2994-
}
2995-
2996-
// The column name is the last "dotIdentifier" node.
2997-
$key = $this->translate( end( $dot_identifiers )->get_first_child_node() );
2928+
$disambiguation_map = $this->create_select_item_disambiguation_map( $select_item_list );
29982929

2999-
$disambiguation_map[ $key ] = $disambiguation_map[ $key ] ?? array();
3000-
$disambiguation_map[ $key ][] = $column_value;
3001-
}
3002-
3003-
// For each ORDER BY item, try to find a corresponding SELECT item.
2930+
// For each "orderList" item, search for a matching SELECT item.
2931+
$disambiguated_order_list = array();
30042932
foreach ( $order_list->get_child_nodes() as $order_item ) {
30052933
/*
30062934
* [GRAMMAR]
30072935
* orderExpression: expr direction?
30082936
*/
3009-
$order_expr = $order_item->get_first_child_node( 'expr' );
3010-
$order_column_ref = $order_expr->get_first_descendant_node( 'columnRef' );
3011-
3012-
// Skip when there is no column in the ORDER BY item (no "columnRef" node),
3013-
// or when the item is already qualified (has a "dotIdentifier" node).
3014-
if (
3015-
! $order_column_ref
3016-
|| null !== $order_column_ref->get_first_descendant_node( 'dotIdentifier' )
3017-
) {
3018-
$disambiguated_order_list[] = $this->translate( $order_item );
3019-
continue;
3020-
}
3021-
3022-
// Support also parenthesized ORDER BY column references (e.g. "(id)").
3023-
$order_expr = $this->unnest_parenthesized_expression( $order_expr );
3024-
3025-
// Consider only simple and parenthesized order column references.
3026-
$order_expr_value = $this->translate( $order_expr );
3027-
$order_column_value = $this->translate( $order_column_ref );
3028-
if ( $order_expr_value !== $order_column_value ) {
3029-
$disambiguated_order_list[] = $this->translate( $order_item );
3030-
continue;
3031-
}
3032-
3033-
// Look for select items that match the column reference.
3034-
$order_column_name = $this->translate( $order_column_ref );
3035-
$select_item_matches = $disambiguation_map[ $order_column_name ] ?? array();
3036-
3037-
// When we find exactly one SELECT item, we can disambiguate the
3038-
// reference. Otherwise, fall back to the original ORDER BY item.
3039-
if ( 1 === count( $select_item_matches ) ) {
3040-
$direction = $order_item->get_first_child_node( 'direction' );
3041-
$translated_order_item = sprintf(
3042-
'%s%s',
3043-
$select_item_matches[0],
3044-
null !== $direction ? ( ' ' . $this->translate( $direction ) ) : ''
3045-
);
3046-
} else {
3047-
$translated_order_item = $this->translate( $order_item );
3048-
}
3049-
$disambiguated_order_list[] = $translated_order_item;
2937+
$order_expr = $order_item->get_first_child_node( 'expr' );
2938+
$order_direction = $order_item->get_first_child_node( 'direction' );
2939+
$disambiguated_item = $this->disambiguate_item( $disambiguation_map, $order_expr );
2940+
2941+
$disambiguated_order_list[] = sprintf(
2942+
'%s%s',
2943+
$disambiguated_item ?? $this->translate( $order_expr ),
2944+
null !== $order_direction ? ( ' ' . $this->translate( $order_direction ) ) : ''
2945+
);
30502946
}
30512947

30522948
// Translate the query expression, replacing the ORDER BY list with
@@ -3065,46 +2961,6 @@ private function translate_query_expression( WP_Parser_Node $node ): string {
30652961
return $this->translate_sequence( $node->get_children() );
30662962
}
30672963

3068-
/**
3069-
* Unnest parenthesized MySQL expression node.
3070-
*
3071-
* In MySQL, extra parentheses around simple expressions are not considered.
3072-
*
3073-
* For example, the "SELECT (((id)))" clause is equivalent to "SELECT id".
3074-
* This means that the "(((id)))" part will behave as a column name rather
3075-
* than as an expression, and the resulting column name will be just "id".
3076-
*
3077-
* @param WP_Parser_Node $node The expression AST node.
3078-
* @return WP_Parser_Node The unnested expression.
3079-
*/
3080-
private function unnest_parenthesized_expression( WP_Parser_Node $node ): WP_Parser_Node {
3081-
$children = $node->get_children();
3082-
3083-
// Descend the "expr -> boolPri -> predicate -> bitExpr -> simpleExpr" tree,
3084-
// when on each level we have only a single child node (expression nesting).
3085-
if (
3086-
1 === count( $children )
3087-
&& $children[0] instanceof WP_Parser_Node
3088-
&& in_array( $children[0]->rule_name, array( 'expr', 'boolPri', 'predicate', 'bitExpr', 'simpleExpr' ), true )
3089-
) {
3090-
$unnested = $this->unnest_parenthesized_expression( $children[0] );
3091-
return $unnested === $children[0] ? $node : $unnested;
3092-
}
3093-
3094-
// Unnest "OPEN_PAR_SYMBOL exprList CLOSE_PAR_SYMBOL" to "exprList".
3095-
if (
3096-
count( $children ) === 3
3097-
&& $children[0] instanceof WP_MySQL_Token && WP_MySQL_Lexer::OPEN_PAR_SYMBOL === $children[0]->id
3098-
&& $children[1] instanceof WP_Parser_Node && 'exprList' === $children[1]->rule_name
3099-
&& $children[2] instanceof WP_MySQL_Token && WP_MySQL_Lexer::CLOSE_PAR_SYMBOL === $children[2]->id
3100-
&& 1 === count( $children[1]->get_children() )
3101-
) {
3102-
return $this->unnest_parenthesized_expression( $children[1] );
3103-
}
3104-
3105-
return $node;
3106-
}
3107-
31082964
/**
31092965
* Translate a MySQL simple expression to SQLite.
31102966
*
@@ -3833,6 +3689,175 @@ private function translate_update_list_in_non_strict_mode( string $table_name, W
38333689
return $fragment;
38343690
}
38353691

3692+
/**
3693+
* Unnest parenthesized MySQL expression node.
3694+
*
3695+
* In MySQL, extra parentheses around simple expressions are not considered.
3696+
*
3697+
* For example, the "SELECT (((id)))" clause is equivalent to "SELECT id".
3698+
* This means that the "(((id)))" part will behave as a column name rather
3699+
* than as an expression, and the resulting column name will be just "id".
3700+
*
3701+
* @param WP_Parser_Node $node The expression AST node.
3702+
* @return WP_Parser_Node The unnested expression.
3703+
*/
3704+
private function unnest_parenthesized_expression( WP_Parser_Node $node ): WP_Parser_Node {
3705+
$children = $node->get_children();
3706+
3707+
// Descend the "expr -> boolPri -> predicate -> bitExpr -> simpleExpr" tree,
3708+
// when on each level we have only a single child node (expression nesting).
3709+
if (
3710+
1 === count( $children )
3711+
&& $children[0] instanceof WP_Parser_Node
3712+
&& in_array( $children[0]->rule_name, array( 'expr', 'boolPri', 'predicate', 'bitExpr', 'simpleExpr' ), true )
3713+
) {
3714+
$unnested = $this->unnest_parenthesized_expression( $children[0] );
3715+
return $unnested === $children[0] ? $node : $unnested;
3716+
}
3717+
3718+
// Unnest "OPEN_PAR_SYMBOL exprList CLOSE_PAR_SYMBOL" to "exprList".
3719+
if (
3720+
count( $children ) === 3
3721+
&& $children[0] instanceof WP_MySQL_Token && WP_MySQL_Lexer::OPEN_PAR_SYMBOL === $children[0]->id
3722+
&& $children[1] instanceof WP_Parser_Node && 'exprList' === $children[1]->rule_name
3723+
&& $children[2] instanceof WP_MySQL_Token && WP_MySQL_Lexer::CLOSE_PAR_SYMBOL === $children[2]->id
3724+
&& 1 === count( $children[1]->get_children() )
3725+
) {
3726+
return $this->unnest_parenthesized_expression( $children[1] );
3727+
}
3728+
3729+
return $node;
3730+
}
3731+
3732+
/**
3733+
* Disambiguate and translate an expression with a simple or parenthesized
3734+
* column reference for use within an ORDER BY, GROUP BY, or HAVING clause.
3735+
*
3736+
* In SQLite, columns that exist in multiple tables used within a query must
3737+
* be fully qualified when used in the ORDER BY, GROUP BY, or HAVING clause.
3738+
* In MySQL, these can be disambiguated using the SELECT item list.
3739+
*
3740+
* For example, when tables "t1" and "t2" both have a column called "name",
3741+
* the following query will cause an "ambiguous column name" error in SQLite,
3742+
* but it will succeed in MySQL, using the "t1.name" from the SELECT clause:
3743+
*
3744+
* SELECT t1.name FROM t1 JOIN t2 ON t2.t1_id = t1.id ORDER BY name
3745+
*
3746+
* This is because MySQL primarily considers the "name" column that was used
3747+
* in the SELECT list - when it is unambiguous, it will be used in ORDER BY.
3748+
*
3749+
* To emulate this behavior in SQLite, we will search for unqualified column
3750+
* references in the ORDER BY, GROUP BY, or HAVING item expression, and try
3751+
* to qualify them using the SELECT item list.
3752+
*
3753+
* In other words, the above query will be rewritten as follows:
3754+
*
3755+
* SELECT t1.name FROM t1 JOIN t2 ON t2.t1_id = t1.id ORDER BY t1.name
3756+
*
3757+
* Note that the ORDER BY column was rewritten from "name" to "t1.name".
3758+
*
3759+
* @TODO: When multi-database support is implemented, we'll also need to
3760+
* consider column references in forms like "db.table.column".
3761+
*
3762+
* @param array $disambiguation_map The SELECT item disambiguation map (column name => array of select items).
3763+
* @see WP_SQLite_Driver::create_select_item_disambiguation_map()
3764+
* @param WP_Parser_Node $expr The expression AST node or subnode.
3765+
* @return string|null The disambiguated and translated expression;
3766+
* null when the expression cannot be disambiguated.
3767+
*/
3768+
private function disambiguate_item( array $disambiguation_map, WP_Parser_Node $expr ) {
3769+
// Skip when there is no column in the expression (no "columnRef" node),
3770+
// or when the column is already qualified (has a "dotIdentifier" node).
3771+
$column_ref = $expr->get_first_descendant_node( 'columnRef' );
3772+
if ( ! $column_ref || $column_ref->get_first_descendant_node( 'dotIdentifier' ) ) {
3773+
return null;
3774+
}
3775+
3776+
// Support also parenthesized column references (e.g. "(id)").
3777+
$expr = $this->unnest_parenthesized_expression( $expr );
3778+
3779+
// Consider only simple and parenthesized column references (as per MySQL).
3780+
$expr_value = $this->translate( $expr );
3781+
$column_value = $this->translate( $column_ref );
3782+
if ( $expr_value !== $column_value ) {
3783+
return null;
3784+
}
3785+
3786+
// Look for SELECT items that match the column reference.
3787+
$column_name = $this->translate( $column_ref );
3788+
$select_item_matches = $disambiguation_map[ $column_name ] ?? array();
3789+
3790+
// When we find exactly one matching SELECT list item, we can disambiguate
3791+
// the column reference. Otherwise, fall back to the original expression.
3792+
if ( 1 === count( $select_item_matches ) ) {
3793+
return $select_item_matches[0];
3794+
}
3795+
return null;
3796+
}
3797+
3798+
/**
3799+
* Create a SELECT item disambiguation map from a SELECT item list for use
3800+
* with the ORDER BY, GROUP BY, and HAVING clause disambiguation algorithm.
3801+
*
3802+
* @see WP_SQLite_Driver::disambiguate_item()
3803+
*
3804+
* @param WP_Parser_Node $select_item_list The "selectItemList" AST node.
3805+
* @return array The SELECT item disambiguation map (column name => array of select items).
3806+
*/
3807+
private function create_select_item_disambiguation_map( WP_Parser_Node $select_item_list ): array {
3808+
// Create a map of SELECT item column names to their qualified values.
3809+
$disambiguation_map = array();
3810+
foreach ( $select_item_list->get_child_nodes() as $select_item ) {
3811+
/*
3812+
* [GRAMMAR]
3813+
* selectItem: tableWild | (expr selectAlias?)
3814+
*/
3815+
3816+
// Skip when a "tableWild" node is used (no "expr" node).
3817+
$select_item_expr = $select_item->get_first_child_node( 'expr' );
3818+
if ( ! $select_item_expr ) {
3819+
continue;
3820+
}
3821+
3822+
// A SELECT item alias always needs to be preserved as-is.
3823+
$alias = $select_item->get_first_child_node( 'selectAlias' );
3824+
if ( $alias ) {
3825+
$alias_value = $this->translate( $alias->get_first_child_node() );
3826+
$disambiguation_map[ $alias_value ] = array( $alias_value );
3827+
continue;
3828+
}
3829+
3830+
// Skip when there is no column listed (no "columnRef" node).
3831+
$select_column_ref = $select_item_expr->get_first_descendant_node( 'columnRef' );
3832+
if ( ! $select_column_ref ) {
3833+
continue;
3834+
}
3835+
3836+
// Skip when the column reference is not qualified (no "dotIdentifier" node).
3837+
$dot_identifiers = $select_column_ref->get_descendant_nodes( 'dotIdentifier' );
3838+
if ( 0 === count( $dot_identifiers ) ) {
3839+
continue;
3840+
}
3841+
3842+
// Support also parenthesized column references (e.g. "(t.id)").
3843+
$select_item_expr = $this->unnest_parenthesized_expression( $select_item_expr );
3844+
3845+
// Consider only simple and parenthesized column references (as per MySQL).
3846+
$expr_value = $this->translate( $select_item_expr );
3847+
$column_value = $this->translate( $select_column_ref );
3848+
if ( $expr_value !== $column_value ) {
3849+
continue;
3850+
}
3851+
3852+
// The column name is the last "dotIdentifier" node.
3853+
$key = $this->translate( end( $dot_identifiers )->get_first_child_node() );
3854+
3855+
$disambiguation_map[ $key ] = $disambiguation_map[ $key ] ?? array();
3856+
$disambiguation_map[ $key ][] = $column_value;
3857+
}
3858+
return $disambiguation_map;
3859+
}
3860+
38363861
/**
38373862
* Emulate MySQL type casting for INSERT or UPDATE value in non-strict mode.
38383863
*

0 commit comments

Comments
 (0)