Unverified 提交 f26ac8ba authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov 提交者: GitHub

Merge pull request #1706 from katzyn/aggregate

Add hypothetical set functions
......@@ -3675,6 +3675,57 @@ Aggregates are only allowed in select statements.
VAR_SAMP(X)
"
"Functions (Aggregate)","RANK aggregate","
RANK(value [,...])
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Returns the rank of the hypothetical row in specified collection of rows.
The rank of a row is the number of rows that precede this row plus 1.
If two or more rows have the same values in ORDER BY columns, these rows get the same rank from the first row with the same values.
It means that gaps in ranks are possible.
","
SELECT RANK(5) WITHIN GROUP (ORDER BY V) FROM TEST;
"
"Functions (Aggregate)","DENSE_RANK aggregate","
DENSE_RANK(value [,...])
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Returns the dense rank of the hypothetical row in specified collection of rows.
The rank of a row is the number of groups of rows with the same values in ORDER BY columns that precede group with this row plus 1.
If two or more rows have the same values in ORDER BY columns, these rows get the same rank.
Gaps in ranks are not possible.
","
SELECT DENSE_RANK(5) WITHIN GROUP (ORDER BY V) FROM TEST;
"
"Functions (Aggregate)","PERCENT_RANK aggregate","
PERCENT_RANK(value [,...])
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Returns the relative rank of the hypothetical row in specified collection of rows.
The relative rank is calculated as (RANK - 1) / (NR - 1),
where RANK is a rank of the row and NR is a total number of rows in the collection including hypothetical row.
","
SELECT PERCENT_RANK(5) WITHIN GROUP (ORDER BY V) FROM TEST;
"
"Functions (Aggregate)","CUME_DIST aggregate","
CUME_DIST(value [,...])
WITHIN GROUP (ORDER BY {expression [ASC|DESC]} [,...])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Returns the relative rank of the hypothetical row in specified collection of rows.
The relative rank is calculated as NP / NR
where NP is a number of rows that precede the current row or have the same values in ORDER BY columns
and NR is a total number of rows in the collection including hypothetical row.
","
SELECT CUME_DIST(5) WITHIN GROUP (ORDER BY V) FROM TEST;
"
"Functions (Aggregate)","PERCENTILE_CONT","
PERCENTILE_CONT(numeric) WITHIN GROUP (ORDER BY value [ASC|DESC])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
......
......@@ -21,6 +21,12 @@ Change Log
<h2>Next Version (unreleased)</h2>
<ul>
<li>PR #1706: Add hypothetical set functions
</li>
<li>PR #1705: Fix GROUP_CONCAT with variable separator
</li>
<li>PR #1704: Fix return type of PERCENTILE_CONT and MEDIAN
</li>
<li>PR #1701: Add PERCENTILE_CONT and PERCENTILE_DISC inverse distribution functions
</li>
<li>Issues #1297, #1697: Failure on concurrent session closure
......
......@@ -3091,16 +3091,30 @@ public class Parser {
}
break;
}
case RANK:
case DENSE_RANK:
case PERCENT_RANK:
case CUME_DIST: {
if (isToken(CLOSE_PAREN)) {
return readWindowFunction(aggregateName);
}
ArrayList<Expression> expressions = Utils.newSmallArrayList();
do {
expressions.add(readExpression());
} while (readIfMore(true));
r = readWithinGroup(aggregateType, expressions.toArray(new Expression[0]), true);
break;
}
case PERCENTILE_CONT:
case PERCENTILE_DISC: {
Expression num = readExpression();
read(CLOSE_PAREN);
r = readWithinGroup(aggregateType, new Expression[] { num });
r = readWithinGroup(aggregateType, new Expression[] { num }, false);
break;
}
case MODE: {
if (readIf(CLOSE_PAREN)) {
r = readWithinGroup(AggregateType.MODE, new Expression[0]);
r = readWithinGroup(AggregateType.MODE, new Expression[0], false);
} else {
Expression expr = readExpression();
r = new Aggregate(aggregateType, new Expression[0], currentSelect, false);
......@@ -3129,16 +3143,29 @@ public class Parser {
return r;
}
private Aggregate readWithinGroup(AggregateType aggregateType, Expression[] args) {
Aggregate r;
private Aggregate readWithinGroup(AggregateType aggregateType, Expression[] args, boolean forHypotheticalSet) {
read("WITHIN");
read(GROUP);
read(OPEN_PAREN);
read(ORDER);
read("BY");
Expression expr = readExpression();
r = new Aggregate(aggregateType, args, currentSelect, false);
readAggregateOrder(r, expr, true);
Aggregate r = new Aggregate(aggregateType, args, currentSelect, false);
if (forHypotheticalSet) {
int count = args.length;
ArrayList<SelectOrderBy> orderList = new ArrayList<>(count);
for (int i = 0; i < count; i++) {
if (i > 0) {
read(COMMA);
}
SelectOrderBy order = new SelectOrderBy();
order.expression = readExpression();
order.sortType = parseSimpleSortType();
orderList.add(order);
}
r.setOrderByList(orderList);
} else {
readAggregateOrder(r, readExpression(), true);
}
return r;
}
......
......@@ -12,6 +12,7 @@ import java.util.Comparator;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.h2.api.ErrorCode;
import org.h2.command.dml.Select;
import org.h2.command.dml.SelectOrderBy;
......@@ -40,8 +41,11 @@ import org.h2.value.TypeInfo;
import org.h2.value.Value;
import org.h2.value.ValueArray;
import org.h2.value.ValueBoolean;
import org.h2.value.ValueDouble;
import org.h2.value.ValueInt;
import org.h2.value.ValueLong;
import org.h2.value.ValueNull;
import org.h2.value.ValueRow;
import org.h2.value.ValueString;
/**
......@@ -108,9 +112,16 @@ public class Aggregate extends AbstractAggregate {
addAggregate("HISTOGRAM", AggregateType.HISTOGRAM);
addAggregate("BIT_OR", AggregateType.BIT_OR);
addAggregate("BIT_AND", AggregateType.BIT_AND);
addAggregate("RANK", AggregateType.RANK);
addAggregate("DENSE_RANK", AggregateType.DENSE_RANK);
addAggregate("PERCENT_RANK", AggregateType.PERCENT_RANK);
addAggregate("CUME_DIST", AggregateType.CUME_DIST);
addAggregate("PERCENTILE_CONT", AggregateType.PERCENTILE_CONT);
addAggregate("PERCENTILE_DISC", AggregateType.PERCENTILE_DISC);
addAggregate("MEDIAN", AggregateType.MEDIAN);
addAggregate("ARRAY_AGG", AggregateType.ARRAY_AGG);
addAggregate("MODE", AggregateType.MODE);
// Oracle compatibility
......@@ -191,6 +202,23 @@ public class Aggregate extends AbstractAggregate {
v = updateCollecting(session, v, remembered);
}
break;
case RANK:
case DENSE_RANK:
case PERCENT_RANK:
case CUME_DIST: {
int count = args.length;
Value[] a = new Value[count];
for (int i = 0; i < count; i++) {
a[i] = remembered != null ? remembered[i] : args[i].getValue(session);
}
((AggregateDataCollecting) data).setSharedArgument(ValueRow.get(a));
a = new Value[count];
for (int i = 0; i < count; i++) {
a[i] = remembered != null ? remembered[count + i] :orderByList.get(i).expression.getValue(session);
}
v = ValueRow.get(a);
break;
}
case PERCENTILE_CONT:
case PERCENTILE_DISC:
((AggregateDataCollecting) data).setSharedArgument(v);
......@@ -382,6 +410,11 @@ public class Aggregate extends AbstractAggregate {
}
return ValueArray.get(array);
}
case RANK:
case DENSE_RANK:
case PERCENT_RANK:
case CUME_DIST:
return getHypotheticalSet(session, data);
case PERCENTILE_CONT:
case PERCENTILE_DISC: {
AggregateDataCollecting collectingData = (AggregateDataCollecting) data;
......@@ -418,6 +451,76 @@ public class Aggregate extends AbstractAggregate {
return data.getValue(session.getDatabase(), type.getValueType());
}
private Value getHypotheticalSet(Session session, AggregateData data) {
AggregateDataCollecting collectingData = (AggregateDataCollecting) data;
Value arg = collectingData.getSharedArgument();
if (arg == null) {
switch (aggregateType) {
case RANK:
case DENSE_RANK:
return ValueInt.get(1);
case PERCENT_RANK:
return ValueDouble.ZERO;
case CUME_DIST:
return ValueDouble.ONE;
default:
throw DbException.getUnsupportedException("aggregateType=" + aggregateType);
}
}
collectingData.add(session.getDatabase(), arg);
Value[] array = collectingData.getArray();
Comparator<Value> sort = orderBySort.getRowValueComparator();
Arrays.sort(array, sort);
return aggregateType == AggregateType.CUME_DIST ? getCumeDist(array, arg, sort) : getRank(array, arg, sort);
}
private Value getRank(Value[] ordered, Value arg, Comparator<Value> sort) {
int size = ordered.length;
int number = 0;
for (int i = 0; i < size; i++) {
Value row = ordered[i];
if (i == 0) {
number = 1;
} else if (sort.compare(ordered[i - 1], row) != 0) {
if (aggregateType == AggregateType.DENSE_RANK) {
number++;
} else {
number = i + 1;
}
}
Value v;
if (aggregateType == AggregateType.PERCENT_RANK) {
int nm = number - 1;
v = nm == 0 ? ValueDouble.ZERO : ValueDouble.get((double) nm / (size - 1));
} else {
v = ValueLong.get(number);
}
if (sort.compare(row, arg) == 0) {
return v;
}
}
throw DbException.throwInternalError();
}
private static Value getCumeDist(Value[] ordered, Value arg, Comparator<Value> sort) {
int size = ordered.length;
for (int start = 0; start < size;) {
Value array = ordered[start];
int end = start + 1;
while (end < size && sort.compare(array, ordered[end]) == 0) {
end++;
}
ValueDouble v = ValueDouble.get((double) end / size);
for (int i = start; i < end; i++) {
if (sort.compare(ordered[i], arg) == 0) {
return v;
}
}
start = end;
}
throw DbException.throwInternalError();
}
private Value getGroupConcat(Session session, AggregateData data) {
AggregateDataCollecting collectingData = (AggregateDataCollecting) data;
Value[] array = collectingData.getArray();
......@@ -532,7 +635,16 @@ public class Aggregate extends AbstractAggregate {
for (SelectOrderBy o : orderByList) {
o.expression = o.expression.optimize(session);
}
orderBySort = createOrder(session, orderByList, 1);
int offset;
switch (aggregateType) {
case ARRAY_AGG:
case GROUP_CONCAT:
offset = 1;
break;
default:
offset = 0;
}
orderBySort = createOrder(session, orderByList, offset);
}
switch (aggregateType) {
case GROUP_CONCAT:
......@@ -568,6 +680,14 @@ public class Aggregate extends AbstractAggregate {
case MIN:
case MAX:
break;
case RANK:
case DENSE_RANK:
type = TypeInfo.TYPE_LONG;
break;
case PERCENT_RANK:
case CUME_DIST:
type = TypeInfo.TYPE_DOUBLE;
break;
case PERCENTILE_CONT:
type = orderByList.get(0).expression.getType();
//$FALL-THROUGH$
......@@ -705,6 +825,18 @@ public class Aggregate extends AbstractAggregate {
case BIT_OR:
text = "BIT_OR";
break;
case RANK:
text = "RANK";
break;
case DENSE_RANK:
text = "DENSE_RANK";
break;
case PERCENT_RANK:
text = "PERCENT_RANK";
break;
case CUME_DIST:
text = "CUME_DIST";
break;
case PERCENTILE_CONT:
text = "PERCENTILE_CONT";
break;
......
......@@ -34,6 +34,10 @@ abstract class AggregateData {
break;
case GROUP_CONCAT:
case ARRAY_AGG:
case RANK:
case DENSE_RANK:
case PERCENT_RANK:
case CUME_DIST:
case PERCENTILE_CONT:
case PERCENTILE_DISC:
case MEDIAN:
......
......@@ -95,6 +95,26 @@ public enum AggregateType {
*/
HISTOGRAM,
/**
* The type for RANK() hypothetical set function.
*/
RANK,
/**
* The type for DENSE_RANK() hypothetical set function.
*/
DENSE_RANK,
/**
* The type for PERCENT_RANK() hypothetical set function.
*/
PERCENT_RANK,
/**
* The type for CUME_DIST() hypothetical set function.
*/
CUME_DIST,
/**
* The aggregate type for PERCENTILE_CONT(expression).
*/
......
......@@ -196,14 +196,14 @@ public class WindowFunction extends DataAnalysisOperation {
getRank(result, ordered, rowIdColumn);
break;
case CUME_DIST:
getCumeDist(session, result, ordered, rowIdColumn);
getCumeDist(result, ordered, rowIdColumn);
break;
case NTILE:
getNtile(session, result, ordered, rowIdColumn);
getNtile(result, ordered, rowIdColumn);
break;
case LEAD:
case LAG:
getLeadLag(session, result, ordered, rowIdColumn);
getLeadLag(result, ordered, rowIdColumn);
break;
case FIRST_VALUE:
case LAST_VALUE:
......@@ -211,7 +211,7 @@ public class WindowFunction extends DataAnalysisOperation {
getNth(session, result, ordered, rowIdColumn);
break;
case RATIO_TO_REPORT:
getRatioToReport(session, result, ordered, rowIdColumn);
getRatioToReport(result, ordered, rowIdColumn);
break;
default:
throw DbException.throwInternalError("type=" + type);
......@@ -243,8 +243,7 @@ public class WindowFunction extends DataAnalysisOperation {
}
}
private void getCumeDist(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> orderedData,
int last) {
private void getCumeDist(HashMap<Integer, Value> result, ArrayList<Value[]> orderedData, int rowIdColumn) {
int size = orderedData.size();
for (int start = 0; start < size;) {
Value[] array = orderedData.get(start);
......@@ -254,15 +253,14 @@ public class WindowFunction extends DataAnalysisOperation {
}
ValueDouble v = ValueDouble.get((double) end / size);
for (int i = start; i < end; i++) {
int rowId = orderedData.get(i)[last].getInt();
int rowId = orderedData.get(i)[rowIdColumn].getInt();
result.put(rowId, v);
}
start = end;
}
}
private static void getNtile(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> orderedData,
int last) {
private static void getNtile(HashMap<Integer, Value> result, ArrayList<Value[]> orderedData, int rowIdColumn) {
int size = orderedData.size();
for (int i = 0; i < size; i++) {
Value[] array = orderedData.get(i);
......@@ -279,12 +277,11 @@ public class WindowFunction extends DataAnalysisOperation {
} else {
v = i / (perTile + 1) + 1;
}
result.put(orderedData.get(i)[last].getInt(), ValueLong.get(v));
result.put(orderedData.get(i)[rowIdColumn].getInt(), ValueLong.get(v));
}
}
private void getLeadLag(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> ordered,
int rowIdColumn) {
private void getLeadLag(HashMap<Integer, Value> result, ArrayList<Value[]> ordered, int rowIdColumn) {
int size = ordered.size();
int numExpressions = getNumExpressions();
int dataType = args[0].getType().getValueType();
......@@ -381,8 +378,7 @@ public class WindowFunction extends DataAnalysisOperation {
}
}
private static void getRatioToReport(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> ordered,
int rowIdColumn) {
private static void getRatioToReport(HashMap<Integer, Value> result, ArrayList<Value[]> ordered, int rowIdColumn) {
int size = ordered.size();
Value value = null;
for (int i = 0; i < size; i++) {
......
......@@ -15,6 +15,7 @@ import org.h2.table.TableFilter;
import org.h2.util.Utils;
import org.h2.value.Value;
import org.h2.value.ValueNull;
import org.h2.value.ValueRow;
import java.util.ArrayList;
import java.util.Collections;
......@@ -305,6 +306,20 @@ public class SortOrder implements Comparator<Value[]> {
return sortTypes;
}
/**
* Returns comparator for row values.
*
* @return comparator for row values.
*/
public Comparator<Value> getRowValueComparator() {
return new Comparator<Value>() {
@Override
public int compare(Value o1, Value o2) {
return SortOrder.this.compare(((ValueRow) o1).getList(), ((ValueRow) o2).getList());
}
};
}
/**
* Returns a sort type bit mask with {@link #NULLS_FIRST} or {@link #NULLS_LAST}
* explicitly set, depending on {@link SysProperties#SORT_NULLS_HIGH}.
......
......@@ -165,8 +165,8 @@ public class TestScript extends TestDb {
testScript("other/" + s + ".sql");
}
for (String s : new String[] { "any", "array-agg", "avg", "bit-and", "bit-or", "count", "envelope",
"every", "group-concat", "histogram", "max", "min", "mode", "percentile", "selectivity", "stddev-pop",
"stddev-samp", "sum", "var-pop", "var-samp" }) {
"every", "group-concat", "histogram", "max", "min", "mode", "percentile", "rank", "selectivity",
"stddev-pop", "stddev-samp", "sum", "var-pop", "var-samp" }) {
testScript("functions/aggregate/" + s + ".sql");
}
for (String s : new String[] { "abs", "acos", "asin", "atan", "atan2",
......
-- Copyright 2004-2019 H2 Group. Multiple-Licensed under the MPL 2.0,
-- and the EPL 1.0 (http://h2database.com/html/license.html).
-- Initial Developer: H2 Group
--
CREATE TABLE TEST(V INT) AS VALUES 1, 2, 3, 3, 4, 5, 6;
> ok
SELECT
RANK(1) WITHIN GROUP (ORDER BY V) R1,
RANK(3) WITHIN GROUP (ORDER BY V) R3,
RANK(7) WITHIN GROUP (ORDER BY V) R7
FROM TEST;
> R1 R3 R7
> -- -- --
> 1 3 8
> rows: 1
SELECT
DENSE_RANK(1) WITHIN GROUP (ORDER BY V) R1,
DENSE_RANK(3) WITHIN GROUP (ORDER BY V) R3,
DENSE_RANK(7) WITHIN GROUP (ORDER BY V) R7
FROM TEST;
> R1 R3 R7
> -- -- --
> 1 3 7
> rows: 1
SELECT
ROUND(PERCENT_RANK(1) WITHIN GROUP (ORDER BY V), 2) R1,
ROUND(PERCENT_RANK(3) WITHIN GROUP (ORDER BY V), 2) R3,
ROUND(PERCENT_RANK(7) WITHIN GROUP (ORDER BY V), 2) R7
FROM TEST;
> R1 R3 R7
> --- ---- ---
> 0.0 0.29 1.0
> rows: 1
SELECT
ROUND(CUME_DIST(1) WITHIN GROUP (ORDER BY V), 2) R1,
ROUND(CUME_DIST(3) WITHIN GROUP (ORDER BY V), 2) R3,
ROUND(CUME_DIST(7) WITHIN GROUP (ORDER BY V), 2) R7
FROM TEST;
> R1 R3 R7
> ---- ---- ---
> 0.25 0.63 1.0
> rows: 1
SELECT
RANK(1, 1) WITHIN GROUP (ORDER BY V, V + 1) R11,
RANK(1, 2) WITHIN GROUP (ORDER BY V, V + 1) R12,
RANK(1, 3) WITHIN GROUP (ORDER BY V, V + 1) R13
FROM TEST;
> R11 R12 R13
> --- --- ---
> 1 1 2
> rows: 1
SELECT
RANK(1, 1) WITHIN GROUP (ORDER BY V, V + 1 DESC) R11,
RANK(1, 2) WITHIN GROUP (ORDER BY V, V + 1 DESC) R12,
RANK(1, 3) WITHIN GROUP (ORDER BY V, V + 1 DESC) R13
FROM TEST;
> R11 R12 R13
> --- --- ---
> 2 1 1
> rows: 1
SELECT RANK(3) WITHIN GROUP (ORDER BY V) FILTER (WHERE V <> 2) FROM TEST;
>> 2
SELECT
RANK(1) WITHIN GROUP (ORDER BY V) OVER () R1,
RANK(3) WITHIN GROUP (ORDER BY V) OVER () R3,
RANK(7) WITHIN GROUP (ORDER BY V) OVER () R7,
V
FROM TEST ORDER BY V;
> R1 R3 R7 V
> -- -- -- -
> 1 3 8 1
> 1 3 8 2
> 1 3 8 3
> 1 3 8 3
> 1 3 8 4
> 1 3 8 5
> 1 3 8 6
> rows (ordered): 7
SELECT
RANK(1) WITHIN GROUP (ORDER BY V) OVER (ORDER BY V) R1,
RANK(3) WITHIN GROUP (ORDER BY V) OVER (ORDER BY V) R3,
RANK(7) WITHIN GROUP (ORDER BY V) OVER (ORDER BY V) R7,
RANK(7) WITHIN GROUP (ORDER BY V) FILTER (WHERE V <> 2) OVER (ORDER BY V) F7,
V
FROM TEST ORDER BY V;
> R1 R3 R7 F7 V
> -- -- -- -- -
> 1 2 2 2 1
> 1 3 3 2 2
> 1 3 5 4 3
> 1 3 5 4 3
> 1 3 6 5 4
> 1 3 7 6 5
> 1 3 8 7 6
> rows (ordered): 7
SELECT
RANK(1) WITHIN GROUP (ORDER BY V) FILTER (WHERE FALSE) R,
DENSE_RANK(1) WITHIN GROUP (ORDER BY V) FILTER (WHERE FALSE) D,
PERCENT_RANK(1) WITHIN GROUP (ORDER BY V) FILTER (WHERE FALSE) P,
CUME_DIST(1) WITHIN GROUP (ORDER BY V) FILTER (WHERE FALSE) C
FROM VALUES (1) T(V);
> R D P C
> - - --- ---
> 1 1 0.0 1.0
> rows: 1
SELECT RANK(1) WITHIN GROUP (ORDER BY V, V) FROM TEST;
> exception SYNTAX_ERROR_2
SELECT RANK(1, 2) WITHIN GROUP (ORDER BY V) FROM TEST;
> exception SYNTAX_ERROR_2
SELECT RANK(V) WITHIN GROUP (ORDER BY V) FROM TEST;
> exception INVALID_VALUE_2
DROP TABLE TEST;
> ok
......@@ -806,4 +806,4 @@ econd irst bcef ordinality nord unnest
analyst occupation distributive josaph aor engineer sajeewa isuru randil kevin doctor businessman artist ashan
corrupts splitted disruption unintentional octets preconditions predicates subq objectweb insn opcodes
preserves masking holder unboxing avert iae transformed subtle reevaluate exclusions subclause ftbl rgr
presorted inclusion contexts aax mwd percentile cont interpolate mwa
presorted inclusion contexts aax mwd percentile cont interpolate mwa hypothetical
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论