Skip to content

Commit 261f526

Browse files
authored
DRILL-3962: Add Support For ROLLUP, CUBE, GROUPING SETS, GROUPING, GROUPING_ID, GROUP_ID (#3026)
1 parent ee4c023 commit 261f526

File tree

18 files changed

+1089
-173
lines changed

18 files changed

+1089
-173
lines changed

exec/java-exec/src/main/codegen/data/DateIntervalFunc.tdd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
{truncInputTypes: ["Date", "TimeStamp", "Time", "Interval", "IntervalDay", "IntervalYear"] },
2323
{truncUnits : ["Second", "Minute", "Hour", "Day", "Month", "Year", "Week", "Quarter", "Decade", "Century", "Millennium" ] },
2424
{timestampDiffUnits : ["Nanosecond", "Microsecond", "Millisecond", "Second", "Minute", "Hour", "Day", "Month", "Year", "Week", "Quarter"] },
25+
{timestampAddUnits : ["Nanosecond", "Microsecond", "Millisecond", "Second", "Minute", "Hour", "Day", "Month", "Year", "Week", "Quarter"] },
26+
{timestampAddInputTypes : ["Date", "TimeStamp", "Time"] },
2527

2628
{
2729
varCharToDate: [
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.drill.exec.expr.fn.impl;
19+
20+
import org.apache.drill.exec.expr.DrillSimpleFunc;
21+
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
22+
import org.apache.drill.exec.expr.annotations.Output;
23+
import org.apache.drill.exec.expr.annotations.Param;
24+
import org.apache.drill.exec.expr.holders.IntHolder;
25+
26+
/**
27+
* Functions for working with GROUPING SETS, ROLLUP, and CUBE.
28+
*
29+
* Note: These are internal helper functions. The actual GROUPING() and GROUPING_ID()
30+
* SQL functions need special query rewriting to work correctly with GROUPING SETS.
31+
*/
32+
public class GroupingFunctions {
33+
34+
/**
35+
* GROUPING_ID_INTERNAL - Returns the grouping ID bitmap.
36+
* This is an internal function that will be called with the $g column value.
37+
*/
38+
@FunctionTemplate(name = "grouping_id_internal",
39+
scope = FunctionTemplate.FunctionScope.SIMPLE,
40+
nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
41+
public static class GroupingIdInternal implements DrillSimpleFunc {
42+
43+
@Param IntHolder groupingId;
44+
@Output IntHolder out;
45+
46+
public void setup() {
47+
}
48+
49+
public void eval() {
50+
out.value = groupingId.value;
51+
}
52+
}
53+
54+
/**
55+
* GROUPING_INTERNAL - Returns 1 if the specified bit in the grouping ID is set, 0 otherwise.
56+
* This is an internal function that extracts a specific bit from the grouping ID.
57+
*
58+
* @param groupingId The grouping ID bitmap ($g column value)
59+
* @param bitPosition The bit position to check (0-based)
60+
*/
61+
@FunctionTemplate(name = "grouping_internal",
62+
scope = FunctionTemplate.FunctionScope.SIMPLE,
63+
nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
64+
public static class GroupingInternal implements DrillSimpleFunc {
65+
66+
@Param IntHolder groupingId;
67+
@Param IntHolder bitPosition;
68+
@Output IntHolder out;
69+
70+
public void setup() {
71+
}
72+
73+
public void eval() {
74+
// Extract the bit at bitPosition from groupingId
75+
// Bit is 1 if column is NOT in the grouping set (i.e., it's a grouping NULL)
76+
out.value = (groupingId.value >> bitPosition.value) & 1;
77+
}
78+
}
79+
}

exec/java-exec/src/main/java/org/apache/drill/exec/physical/config/UnionAll.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,22 @@ public class UnionAll extends AbstractMultiple {
3333

3434
public static final String OPERATOR_TYPE = "UNION";
3535

36+
private final boolean isGroupingSetsExpansion;
37+
3638
@JsonCreator
37-
public UnionAll(@JsonProperty("children") List<PhysicalOperator> children) {
39+
public UnionAll(@JsonProperty("children") List<PhysicalOperator> children,
40+
@JsonProperty("isGroupingSetsExpansion") Boolean isGroupingSetsExpansion) {
3841
super(children);
42+
this.isGroupingSetsExpansion = isGroupingSetsExpansion != null ? isGroupingSetsExpansion : false;
43+
}
44+
45+
public UnionAll(List<PhysicalOperator> children) {
46+
this(children, false);
47+
}
48+
49+
@JsonProperty("isGroupingSetsExpansion")
50+
public boolean isGroupingSetsExpansion() {
51+
return isGroupingSetsExpansion;
3952
}
4053

4154
@Override
@@ -45,7 +58,7 @@ public <T, X, E extends Throwable> T accept(PhysicalVisitor<T, X, E> physicalVis
4558

4659
@Override
4760
public PhysicalOperator getNewWithChildren(List<PhysicalOperator> children) {
48-
return new UnionAll(children);
61+
return new UnionAll(children, isGroupingSetsExpansion);
4962
}
5063

5164
@Override

exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/union/UnionAllRecordBatch.java

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,7 @@
1717
*/
1818
package org.apache.drill.exec.physical.impl.union;
1919

20-
import java.util.ArrayList;
21-
import java.util.Collections;
22-
import java.util.Iterator;
23-
import java.util.List;
24-
import java.util.NoSuchElementException;
25-
import java.util.Stack;
26-
20+
import com.google.common.base.Preconditions;
2721
import org.apache.calcite.util.Pair;
2822
import org.apache.drill.common.exceptions.DrillRuntimeException;
2923
import org.apache.drill.common.expression.ErrorCollector;
@@ -59,10 +53,16 @@
5953
import org.apache.drill.exec.vector.FixedWidthVector;
6054
import org.apache.drill.exec.vector.SchemaChangeCallBack;
6155
import org.apache.drill.exec.vector.ValueVector;
62-
import com.google.common.base.Preconditions;
6356
import org.slf4j.Logger;
6457
import org.slf4j.LoggerFactory;
6558

59+
import java.util.ArrayList;
60+
import java.util.Collections;
61+
import java.util.Iterator;
62+
import java.util.List;
63+
import java.util.NoSuchElementException;
64+
import java.util.Stack;
65+
6666
public class UnionAllRecordBatch extends AbstractBinaryRecordBatch<UnionAll> {
6767
private static final Logger logger = LoggerFactory.getLogger(UnionAllRecordBatch.class);
6868

@@ -278,10 +278,14 @@ private void inferOutputFieldsBothSide(final BatchSchema leftSchema, final Batch
278278
final Iterator<MaterializedField> leftIter = leftSchema.iterator();
279279
final Iterator<MaterializedField> rightIter = rightSchema.iterator();
280280

281+
logger.debug("UnionAll inferring schema: isGroupingSetsExpansion={}", popConfig.isGroupingSetsExpansion());
281282
int index = 1;
282283
while (leftIter.hasNext() && rightIter.hasNext()) {
283284
MaterializedField leftField = leftIter.next();
284285
MaterializedField rightField = rightIter.next();
286+
logger.debug("Column {}: left='{}' type={}, right='{}' type={}",
287+
index, leftField.getName(), leftField.getType().getMinorType(),
288+
rightField.getName(), rightField.getType().getMinorType());
285289

286290
if (Types.isSameTypeAndMode(leftField.getType(), rightField.getType())) {
287291
MajorType.Builder builder = MajorType.newBuilder()
@@ -301,15 +305,7 @@ private void inferOutputFieldsBothSide(final BatchSchema leftSchema, final Batch
301305
builder.setMinorType(leftField.getType().getMinorType());
302306
builder = Types.calculateTypePrecisionAndScale(leftField.getType(), rightField.getType(), builder);
303307
} else {
304-
TypeProtos.MinorType outputMinorType = TypeCastRules.getLeastRestrictiveType(
305-
leftField.getType().getMinorType(),
306-
rightField.getType().getMinorType()
307-
);
308-
if (outputMinorType == null) {
309-
throw new DrillRuntimeException("Type mismatch between " + leftField.getType().getMinorType().toString() +
310-
" on the left side and " + rightField.getType().getMinorType().toString() +
311-
" on the right side in column " + index + " of UNION ALL");
312-
}
308+
TypeProtos.MinorType outputMinorType = resolveUnionColumnType(leftField, rightField, index);
313309
builder.setMinorType(outputMinorType);
314310
}
315311

@@ -328,6 +324,46 @@ private void inferOutputFieldsBothSide(final BatchSchema leftSchema, final Batch
328324
"Mismatch of column count should have been detected when validating sqlNode at planning";
329325
}
330326

327+
/**
328+
* Determines the output type for a UNION ALL column when combining two types.
329+
* <p>
330+
* Special handling is applied for GROUPING SETS expansion:
331+
* - Drill represents NULL columns as INT during grouping sets expansion.
332+
* - If one side is INT (likely a NULL placeholder) and the other is not, prefer the non-INT type.
333+
* <p>
334+
* For all other cases, the least restrictive type according to Drill's type cast rules is returned.
335+
*
336+
* @param leftField The type of the left column
337+
* @param rightField The type of the right column
338+
* @param index The column index (for logging)
339+
* @return The resolved output type
340+
* @throws DrillRuntimeException if types are incompatible
341+
*/
342+
private TypeProtos.MinorType resolveUnionColumnType(MaterializedField leftField,
343+
MaterializedField rightField,
344+
int index) {
345+
TypeProtos.MinorType leftType = leftField.getType().getMinorType();
346+
TypeProtos.MinorType rightType = rightField.getType().getMinorType();
347+
348+
boolean isGroupingSets = popConfig.isGroupingSetsExpansion();
349+
boolean leftIsPlaceholder = leftType == TypeProtos.MinorType.INT && rightType != TypeProtos.MinorType.INT;
350+
boolean rightIsPlaceholder = rightType == TypeProtos.MinorType.INT && leftType != TypeProtos.MinorType.INT;
351+
352+
if (isGroupingSets && (leftIsPlaceholder || rightIsPlaceholder)) {
353+
TypeProtos.MinorType outputType = leftIsPlaceholder ? rightType : leftType;
354+
logger.debug("GROUPING SETS: Preferring {} over INT for column {}", outputType, index);
355+
return outputType;
356+
}
357+
358+
TypeProtos.MinorType outputType = TypeCastRules.getLeastRestrictiveType(leftType, rightType);
359+
if (outputType == null) {
360+
throw new DrillRuntimeException("Type mismatch between " + leftType +
361+
" and " + rightType + " in column " + index + " of UNION ALL");
362+
}
363+
logger.debug("Using standard type rules: {} + {} -> {}", leftType, rightType, outputType);
364+
return outputType;
365+
}
366+
331367
private void inferOutputFieldsOneSide(final BatchSchema schema) {
332368
for (MaterializedField field : schema) {
333369
container.addOrGet(field, callBack);

exec/java-exec/src/main/java/org/apache/drill/exec/planner/PlannerPhase.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,11 @@ static RuleSet getDrillUserConfigurableLogicalRules(OptimizerRulesContext optimi
356356
Convert from Calcite Logical to Drill Logical Rules.
357357
*/
358358
RuleInstance.EXPAND_CONVERSION_RULE,
359+
360+
// Expand GROUPING SETS, ROLLUP, and CUBE BEFORE converting aggregates to Drill logical operators
361+
// This prevents multi-grouping-set aggregates from being converted to DrillAggregateRel
362+
RuleInstance.AGGREGATE_EXPAND_GROUPING_SETS_RULE,
363+
359364
DrillScanRule.INSTANCE,
360365
DrillFilterRule.INSTANCE,
361366
DrillProjectRule.INSTANCE,

exec/java-exec/src/main/java/org/apache/drill/exec/planner/RuleInstance.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.apache.calcite.rel.rules.SortRemoveRule;
4040
import org.apache.calcite.rel.rules.SubQueryRemoveRule;
4141
import org.apache.calcite.rel.rules.UnionToDistinctRule;
42+
import org.apache.drill.exec.planner.logical.DrillAggregateExpandGroupingSetsRule;
4243
import org.apache.drill.exec.planner.logical.DrillConditions;
4344
import org.apache.drill.exec.planner.logical.DrillRelFactories;
4445
import com.google.common.base.Preconditions;
@@ -107,6 +108,9 @@ public boolean matches(RelOptRuleCall call) {
107108
.withRelBuilderFactory(DrillRelFactories.LOGICAL_BUILDER)
108109
.toRule();
109110

111+
RelOptRule AGGREGATE_EXPAND_GROUPING_SETS_RULE =
112+
DrillAggregateExpandGroupingSetsRule.INSTANCE;
113+
110114
/**
111115
* Instance of the rule that works on logical joins only, and pushes to the
112116
* right.

0 commit comments

Comments
 (0)