Unverified 提交 54d25b13 authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov 提交者: GitHub

Merge pull request #1660 from katzyn/window

Optimize window aggregates with AND UNBOUNDED FOLLOWING and no exclusions
......@@ -3542,6 +3542,8 @@ Concatenates strings with a separator.
The default separator is a ',' (without space).
This method returns a string.
If no rows are selected, the result is NULL.
If ORDER BY is not specified order of strings is not determined.
Note that when this aggregate is used with OVER clause its ORDER BY subclause does not enforce exact order of strings.
Aggregates are only allowed in select statements.
","
GROUP_CONCAT(NAME ORDER BY ID SEPARATOR ', ')
......@@ -3555,6 +3557,8 @@ ARRAY_AGG ( [ DISTINCT|ALL ] string
Aggregate the value into an array.
This method returns an array.
If no rows are selected, the result is NULL.
If ORDER BY is not specified order of values is not determined.
Note that when this aggregate is used with OVER clause its ORDER BY subclause does not enforce exact order of values.
Aggregates are only allowed in select statements.
","
ARRAY_AGG(NAME ORDER BY ID)
......
......@@ -21,6 +21,8 @@ Change Log
<h2>Next Version (unreleased)</h2>
<ul>
<li>PR #1660: Optimize window aggregates with AND UNBOUNDED FOLLOWING and no exclusions
</li>
<li>PR #1656: Optimize window aggregates with ORDER BY + UNBOUNDED PRECEDING + no exclusions
</li>
<li>Issue #1654: OOM in TestMemoryUsage, in big mode
......
......@@ -95,19 +95,25 @@ public abstract class AbstractAggregate extends DataAnalysisOperation {
boolean grouped = frame == null
|| frame.getUnits() != WindowFrameUnits.ROWS && frame.getExclusion().isGroupOrNoOthers();
if (frame == null) {
assert over.getOrderBy() != null;
aggregateFastPartition(session, result, ordered, rowIdColumn, grouped);
return;
}
if (frame.getStarting().getType() == WindowFrameBoundType.UNBOUNDED_PRECEDING
&& frame.getExclusion() == WindowFrameExclusion.EXCLUDE_NO_OTHERS) {
if (frame.getExclusion() == WindowFrameExclusion.EXCLUDE_NO_OTHERS) {
WindowFrameBound following = frame.getFollowing();
if (following != null && following.getType() == WindowFrameBoundType.UNBOUNDED_FOLLOWING) {
aggregateWholePartition(session, result, ordered, rowIdColumn);
} else {
aggregateFastPartition(session, result, ordered, rowIdColumn, grouped);
boolean unboundedFollowing = following != null
&& following.getType() == WindowFrameBoundType.UNBOUNDED_FOLLOWING;
if (frame.getStarting().getType() == WindowFrameBoundType.UNBOUNDED_PRECEDING) {
if (unboundedFollowing) {
aggregateWholePartition(session, result, ordered, rowIdColumn);
} else {
aggregateFastPartition(session, result, ordered, rowIdColumn, grouped);
}
return;
}
if (unboundedFollowing) {
aggregateFastPartitionInReverse(session, result, ordered, rowIdColumn, grouped);
return;
}
return;
}
// All other types of frames (slow)
int size = ordered.size();
......@@ -117,7 +123,8 @@ public abstract class AbstractAggregate extends DataAnalysisOperation {
false); iter.hasNext();) {
updateFromExpressions(session, aggregateData, iter.next());
}
i = processGroup(session, result, ordered, rowIdColumn, i, size, aggregateData, grouped);
Value r = getAggregatedValue(session, aggregateData);
i = processGroup(session, result, r, ordered, rowIdColumn, i, size, aggregateData, grouped);
}
}
......@@ -126,6 +133,7 @@ public abstract class AbstractAggregate extends DataAnalysisOperation {
Object aggregateData = createAggregateData();
int size = ordered.size();
int lastIncludedRow = -1;
Value r = null;
for (int i = 0; i < size;) {
int newLast = WindowFrame.getEndIndex(over, session, ordered, getOverOrderBySort(), i);
assert newLast >= lastIncludedRow;
......@@ -134,15 +142,42 @@ public abstract class AbstractAggregate extends DataAnalysisOperation {
updateFromExpressions(session, aggregateData, ordered.get(j));
}
lastIncludedRow = newLast;
r = getAggregatedValue(session, aggregateData);
} else if (r == null) {
r = getAggregatedValue(session, aggregateData);
}
i = processGroup(session, result, r, ordered, rowIdColumn, i, size, aggregateData, grouped);
}
}
private void aggregateFastPartitionInReverse(Session session, HashMap<Integer, Value> result,
ArrayList<Value[]> ordered, int rowIdColumn, boolean grouped) {
Object aggregateData = createAggregateData();
int firstIncludedRow = ordered.size();
Value r = null;
for (int i = firstIncludedRow - 1; i >= 0;) {
int newLast = over.getWindowFrame().getStartIndex(session, ordered, getOverOrderBySort(), i);
assert newLast <= firstIncludedRow;
if (newLast < firstIncludedRow) {
for (int j = firstIncludedRow - 1; j >= newLast; j--) {
updateFromExpressions(session, aggregateData, ordered.get(j));
}
firstIncludedRow = newLast;
r = getAggregatedValue(session, aggregateData);
} else if (r == null) {
r = getAggregatedValue(session, aggregateData);
}
i = processGroup(session, result, ordered, rowIdColumn, i, size, aggregateData, grouped);
Value[] lastRowInGroup = ordered.get(i), currentRowInGroup = lastRowInGroup;
do {
result.put(currentRowInGroup[rowIdColumn].getInt(), r);
} while (--i >= 0 && grouped
&& overOrderBySort.compare(lastRowInGroup, currentRowInGroup = ordered.get(i)) == 0);
}
}
private int processGroup(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> ordered,
private int processGroup(Session session, HashMap<Integer, Value> result, Value r, ArrayList<Value[]> ordered,
int rowIdColumn, int i, int size, Object aggregateData, boolean grouped) {
Value[] firstRowInGroup = ordered.get(i), currentRowInGroup = firstRowInGroup;
Value r = getAggregatedValue(session, aggregateData);
do {
result.put(currentRowInGroup[rowIdColumn].getInt(), r);
} while (++i < size && grouped
......@@ -178,8 +213,8 @@ public abstract class AbstractAggregate extends DataAnalysisOperation {
@Override
protected void updateAggregate(Session session, SelectGroups groupData, int groupRowId) {
ArrayList<SelectOrderBy> orderBy;
if (filterCondition == null || filterCondition.getBooleanValue(session)) {
ArrayList<SelectOrderBy> orderBy;
if (over != null) {
if ((orderBy = over.getOrderBy()) != null) {
updateOrderedAggregate(session, groupData, groupRowId, orderBy);
......@@ -189,6 +224,8 @@ public abstract class AbstractAggregate extends DataAnalysisOperation {
} else {
updateAggregate(session, getGroupData(groupData, false));
}
} else if (over != null && (orderBy = over.getOrderBy()) != null) {
updateOrderedAggregate(session, groupData, groupRowId, orderBy);
}
}
......
......@@ -346,6 +346,9 @@ public class Aggregate extends AbstractAggregate {
if (orderByList != null) {
n += orderByList.size();
}
if (filterCondition != null) {
n++;
}
return n;
}
......@@ -360,13 +363,18 @@ public class Aggregate extends AbstractAggregate {
array[offset++] = o.expression.getValue(session);
}
}
if (filterCondition != null) {
array[offset] = ValueBoolean.get(filterCondition.getBooleanValue(session));
}
}
@Override
protected void updateFromExpressions(Session session, Object aggregateData, Value[] array) {
AggregateData data = (AggregateData) aggregateData;
Value v = on == null ? null : array[0];
updateData(session, data, v, array);
if (filterCondition == null || array[getNumExpressions() - 1].getBoolean()) {
AggregateData data = (AggregateData) aggregateData;
Value v = on == null ? null : array[0];
updateData(session, data, v, array);
}
}
@Override
......
......@@ -20,6 +20,7 @@ import org.h2.table.TableFilter;
import org.h2.value.DataType;
import org.h2.value.Value;
import org.h2.value.ValueArray;
import org.h2.value.ValueBoolean;
import org.h2.value.ValueNull;
/**
......@@ -232,19 +233,29 @@ public class JavaAggregate extends AbstractAggregate {
@Override
protected int getNumExpressions() {
return args.length;
int n = args.length;
if (filterCondition != null) {
n++;
}
return n;
}
@Override
protected void rememberExpressions(Session session, Value[] array) {
for (int i = 0; i < args.length; i++) {
int length = args.length;
for (int i = 0; i < length; i++) {
array[i] = args[i].getValue(session);
}
if (filterCondition != null) {
array[length] = ValueBoolean.get(filterCondition.getBooleanValue(session));
}
}
@Override
protected void updateFromExpressions(Session session, Object aggregateData, Value[] array) {
updateData(session, aggregateData, array);
if (filterCondition == null || array[getNumExpressions() - 1].getBoolean()) {
updateData(session, aggregateData, array);
}
}
@Override
......
......@@ -345,7 +345,11 @@ public abstract class DataAnalysisOperation extends Expression {
data = partition.getData();
}
if (forOrderBy || !isAggregate()) {
return getOrderedResult(session, groupData, partition, data);
Value result = getOrderedResult(session, groupData, partition, data);
if (result == null) {
return getAggregatedValue(session, null);
}
return result;
}
// Window aggregate without ORDER BY clause in window specification
Value result = partition.getResult();
......@@ -411,7 +415,8 @@ public abstract class DataAnalysisOperation extends Expression {
}
/**
* Returns result of this window function or window aggregate.
* Returns result of this window function or window aggregate. This method
* may not be called on window aggregate without window order clause.
*
* @param session
* the session
......
......@@ -424,6 +424,32 @@ public final class WindowFrame {
: plainIterator(orderedRows, startIndex, endIndex, reverse);
}
/**
* Returns start index of this frame,
*
* @param session
* the session
* @param orderedRows
* ordered rows
* @param sortOrder
* sort order
* @param currentRow
* index of the current row
* @return start index
* @throws UnsupportedOperationException
* if exclusion clause is not EXCLUDE NO OTHERS
*/
public int getStartIndex(Session session, ArrayList<Value[]> orderedRows, SortOrder sortOrder, int currentRow) {
if (exclusion != WindowFrameExclusion.EXCLUDE_NO_OTHERS) {
throw new UnsupportedOperationException();
}
int startIndex = getIndex(session, orderedRows, sortOrder, currentRow, starting, false);
if (startIndex < 0) {
startIndex = 0;
}
return startIndex;
}
/**
* Returns end index of this frame,
*
......
......@@ -107,15 +107,32 @@ SELECT ARRAY_AGG(ID ORDER BY ID) OVER (PARTITION BY NAME), NAME FROM TEST;
> [4, 5, 6] c
> rows: 6
SELECT ARRAY_AGG(ID ORDER BY ID) FILTER (WHERE ID < 3 OR ID > 4) OVER (PARTITION BY NAME), NAME FROM TEST ORDER BY NAME;
> ARRAY_AGG(ID ORDER BY ID) FILTER (WHERE ((ID < 3) OR (ID > 4))) OVER (PARTITION BY NAME) NAME
> ---------------------------------------------------------------------------------------- ----
> [1, 2] a
> [1, 2] a
> null b
> [5, 6] c
> [5, 6] c
> [5, 6] c
SELECT
ARRAY_AGG(ID ORDER BY ID) FILTER (WHERE ID < 3 OR ID > 4) OVER (PARTITION BY NAME) A,
ARRAY_AGG(ID ORDER BY ID) FILTER (WHERE ID < 3 OR ID > 4) OVER (PARTITION BY NAME ORDER BY ID) AO,
ID, NAME FROM TEST ORDER BY ID;
> A AO ID NAME
> ------ ------ -- ----
> [1, 2] [1] 1 a
> [1, 2] [1, 2] 2 a
> null null 3 b
> [5, 6] null 4 c
> [5, 6] [5] 5 c
> [5, 6] [5, 6] 6 c
> rows (ordered): 6
SELECT
ARRAY_AGG(ID ORDER BY ID) FILTER (WHERE ID < 3 OR ID > 4)
OVER (ORDER BY ID ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) A,
ID FROM TEST ORDER BY ID;
> A ID
> ------ --
> [1, 2] 1
> [1, 2] 2
> [2] 3
> [5] 4
> [5, 6] 5
> [5, 6] 6
> rows (ordered): 6
SELECT ARRAY_AGG(SUM(ID)) OVER () FROM TEST;
......@@ -311,6 +328,22 @@ SELECT *,
> 8 9 [7, 8] [9, 9] [4, 5, 6, 7, 8] [8, 8, 8, 9, 9] [9, 9, 8, 8, 8] [4, 5, 6, 7, 8] [8, 8, 8, 9, 9]
> rows: 8
SELECT *,
ARRAY_AGG(ID ORDER BY ID) OVER (ORDER BY VALUE RANGE BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) A1,
ARRAY_AGG(ID) OVER (ORDER BY VALUE RANGE BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) A2
FROM TEST;
> ID VALUE A1 A2
> -- ----- ------------------------ ------------------------
> 1 1 [1, 2, 3, 4, 5, 6, 7, 8] [1, 2]
> 2 1 [1, 2, 3, 4, 5, 6, 7, 8] [1, 2]
> 3 5 [3, 4, 5, 6, 7, 8] [1, 2, 3]
> 4 8 [4, 5, 6, 7, 8] [1, 2, 3, 4, 5, 6, 7, 8]
> 5 8 [4, 5, 6, 7, 8] [1, 2, 3, 4, 5, 6, 7, 8]
> 6 8 [4, 5, 6, 7, 8] [1, 2, 3, 4, 5, 6, 7, 8]
> 7 9 [4, 5, 6, 7, 8] [1, 2, 3, 4, 5, 6, 7, 8]
> 8 9 [4, 5, 6, 7, 8] [1, 2, 3, 4, 5, 6, 7, 8]
> rows: 8
SELECT *, ARRAY_AGG(ID) OVER (ORDER BY VALUE ROWS -1 PRECEDING) FROM TEST;
> exception INVALID_VALUE_2
......@@ -371,7 +404,7 @@ SELECT *,
ARRAY_AGG(ID) OVER (ORDER BY VALUE GROUPS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) U_P,
ARRAY_AGG(ID) OVER (ORDER BY VALUE GROUPS BETWEEN 2 PRECEDING AND 1 PRECEDING) P,
ARRAY_AGG(ID) OVER (ORDER BY VALUE GROUPS BETWEEN 1 FOLLOWING AND 2 FOLLOWING) F,
ARRAY_AGG(ID) OVER (ORDER BY VALUE GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) U_F
ARRAY_AGG(ID ORDER BY ID) OVER (ORDER BY VALUE GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) U_F
FROM TEST;
> ID VALUE U_P P F U_F
> -- ----- ------------------ ------------ --------------- ------------------
......@@ -451,11 +484,11 @@ SELECT ID, VALUE, ARRAY_AGG(ID) OVER (ORDER BY VALUE RANGE BETWEEN 2 PRECEDING A
SELECT ID, VALUE,
ARRAY_AGG(ID) OVER (ORDER BY VALUE ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) CP,
ARRAY_AGG(ID) OVER (ORDER BY VALUE ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) CF,
ARRAY_AGG(ID ORDER BY ID) OVER (ORDER BY VALUE ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) CF,
ARRAY_AGG(ID) OVER (ORDER BY VALUE RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) RP,
ARRAY_AGG(ID) OVER (ORDER BY VALUE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) RF,
ARRAY_AGG(ID ORDER BY ID) OVER (ORDER BY VALUE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) RF,
ARRAY_AGG(ID) OVER (ORDER BY VALUE GROUPS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) GP,
ARRAY_AGG(ID) OVER (ORDER BY VALUE GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) GF
ARRAY_AGG(ID ORDER BY ID) OVER (ORDER BY VALUE GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) GF
FROM TEST;
> ID VALUE CP CF RP RF GP GF
> -- ----- ------------------------ ------------------------ ------------------------ ------------------------ ------------------------ ------------------------
......@@ -545,11 +578,11 @@ SELECT ID, VALUE, ARRAY_AGG(ID) OVER (ORDER BY VALUE RANGE BETWEEN 1 FOLLOWING A
SELECT ID, VALUE,
ARRAY_AGG(ID) OVER (ORDER BY VALUE ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) CP,
ARRAY_AGG(ID) OVER (ORDER BY VALUE ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) CF,
ARRAY_AGG(ID ORDER BY ID) OVER (ORDER BY VALUE ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) CF,
ARRAY_AGG(ID) OVER (ORDER BY VALUE RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) RP,
ARRAY_AGG(ID) OVER (ORDER BY VALUE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) RF,
ARRAY_AGG(ID ORDER BY ID) OVER (ORDER BY VALUE RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) RF,
ARRAY_AGG(ID) OVER (ORDER BY VALUE GROUPS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) GP,
ARRAY_AGG(ID) OVER (ORDER BY VALUE GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) GF
ARRAY_AGG(ID ORDER BY ID) OVER (ORDER BY VALUE GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) GF
FROM TEST;
> ID VALUE CP CF RP RF GP GF
> -- ----- ------------------------ ------------------------ ------------------------ ------------------------ ------------------------ ------------------------
......
......@@ -805,4 +805,4 @@ queryparser tokenized freeze factorings recompilation unenclosed rfe dsync
econd irst bcef ordinality nord unnest
analyst occupation distributive josaph aor engineer sajeewa isuru randil kevin doctor businessman artist ashan
corrupts splitted disruption unintentional octets preconditions predicates subq objectweb insn opcodes
preserves masking holder unboxing avert iae transformed subtle reevaluate exclusions
preserves masking holder unboxing avert iae transformed subtle reevaluate exclusions subclause
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论