提交 4b92a2cd authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov

Add PERCENT_RANK() and CUME_DIST() window functions

上级 d988f628
......@@ -5105,8 +5105,37 @@ Gaps in ranks are not possible.
Window functions are currently experimental in H2 and should be used with caution.
They also may require a lot of memory for large queries.
","
SELECT RANK() OVER (ORDER BY ID), * FROM TEST;
SELECT RANK() OVER (PARTITION BY CATEGORY ORDER BY ID), * FROM TEST;
SELECT DENSE_RANK() OVER (ORDER BY ID), * FROM TEST;
SELECT DENSE_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID), * FROM TEST;
"
"Functions (Window)","PERCENT_RANK","
PERCENT_RANK() OVER windowSpecification
","
Returns the relative rank of the current row.
The relative rank is calculated as (RANK - 1) / (NR - 1),
where RANK is a rank of the row and NR is a number of rows in window partition with this row.
Window functions are currently experimental in H2 and should be used with caution.
They also may require a lot of memory for large queries.
","
SELECT PERCENT_RANK() OVER (ORDER BY ID), * FROM TEST;
SELECT PERCENT_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID), * FROM TEST;
"
"Functions (Window)","CUME_DIST","
CUME_DIST() OVER windowSpecification
","
Returns the relative rank of the current row.
The relative rank is calculated as NP / NR
where NP is a number of rows that precede the current row or have the same values in ORDER BY columns
and NR is a number of rows in window partition with this row.
Window functions are currently experimental in H2 and should be used with caution.
They also may require a lot of memory for large queries.
","
SELECT CUME_DIST() OVER (ORDER BY ID), * FROM TEST;
SELECT CUME_DIST() OVER (PARTITION BY CATEGORY ORDER BY ID), * FROM TEST;
"
"System Tables","Information Schema","
......
......@@ -38,7 +38,7 @@ public abstract class AbstractAggregate extends Expression {
protected Window over;
private SortOrder overOrderBySort;
protected SortOrder overOrderBySort;
private int lastGroupRowId;
......@@ -368,6 +368,15 @@ public abstract class AbstractAggregate extends Expression {
return result;
}
/***
* Returns aggregated value.
*
* @param session
* the session
* @param aggregateData
* the aggregate data
* @return aggregated value.
*/
protected abstract Value getAggregatedValue(Session session, Object aggregateData);
private void updateOrderedAggregate(Session session, SelectGroups groupData, int groupRowId,
......@@ -394,15 +403,31 @@ public abstract class AbstractAggregate extends Expression {
@SuppressWarnings("unchecked")
ArrayList<Value[]> orderedData = (ArrayList<Value[]>) data;
int ne = getNumExpressions();
int last = ne + over.getOrderBy().size();
int rowIdColumn = ne + over.getOrderBy().size();
Collections.sort(orderedData, overOrderBySort);
getOrderedResultLoop(session, result, orderedData, rowIdColumn);
partition.setOrderedResult(result);
}
return result.get(groupData.getCurrentGroupRowId());
}
/**
* @param session
* the session
* @param result
* the map to append result to
* @param ordered
* ordered data
* @param rowIdColumn
* the index of row id value
*/
protected void getOrderedResultLoop(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> ordered,
int rowIdColumn) {
Object aggregateData = createAggregateData();
for (Value[] row : orderedData) {
for (Value[] row : ordered) {
updateFromExpressions(session, aggregateData, row);
result.put(row[last].getInt(), getAggregatedValue(session, aggregateData));
}
result.put(row[rowIdColumn].getInt(), getAggregatedValue(session, aggregateData));
}
return result.get(groupData.getCurrentGroupRowId());
}
protected StringBuilder appendTailConditions(StringBuilder builder) {
......
......@@ -5,10 +5,14 @@
*/
package org.h2.expression.aggregate;
import java.util.ArrayList;
import java.util.HashMap;
import org.h2.command.dml.Select;
import org.h2.engine.Session;
import org.h2.message.DbException;
import org.h2.value.Value;
import org.h2.value.ValueDouble;
import org.h2.value.ValueInt;
/**
......@@ -36,6 +40,16 @@ public class WindowFunction extends AbstractAggregate {
*/
DENSE_RANK,
/**
* The type for PERCENT_RANK() window function.
*/
PERCENT_RANK,
/**
* The type for CUME_DIST() window function.
*/
CUME_DIST,
;
/**
......@@ -53,6 +67,10 @@ public class WindowFunction extends AbstractAggregate {
return RANK;
case "DENSE_RANK":
return WindowFunctionType.DENSE_RANK;
case "PERCENT_RANK":
return WindowFunctionType.PERCENT_RANK;
case "CUME_DIST":
return WindowFunctionType.CUME_DIST;
default:
return null;
}
......@@ -60,27 +78,7 @@ public class WindowFunction extends AbstractAggregate {
}
private static class RowNumberData {
int number;
RowNumberData() {
}
}
private static final class RankData extends RowNumberData {
Value[] previousRow;
int previousNumber;
RankData() {
}
}
private WindowFunctionType type;
private final WindowFunctionType type;
/**
* Creates new instance of a window function.
......@@ -102,20 +100,7 @@ public class WindowFunction extends AbstractAggregate {
@Override
protected void updateAggregate(Session session, Object aggregateData) {
switch (type) {
case ROW_NUMBER:
((RowNumberData) aggregateData).number++;
break;
case RANK:
case DENSE_RANK: {
RankData data = (RankData) aggregateData;
data.number++;
data.previousNumber++;
break;
}
default:
throw DbException.throwInternalError("type=" + type);
}
throw DbException.getUnsupportedException("Window function");
}
@Override
......@@ -135,65 +120,106 @@ public class WindowFunction extends AbstractAggregate {
@Override
protected void updateFromExpressions(Session session, Object aggregateData, Value[] array) {
throw DbException.getUnsupportedException("Window function");
}
@Override
protected Object createAggregateData() {
throw DbException.getUnsupportedException("Window function");
}
@Override
protected void getOrderedResultLoop(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> ordered,
int rowIdColumn) {
if (type == WindowFunctionType.CUME_DIST) {
getCumeDist(session, result, ordered, rowIdColumn);
return;
}
int size = ordered.size();
int number = 0;
for (int i = 0; i < size; i++) {
Value[] row = ordered.get(i);
int rowId = row[rowIdColumn].getInt();
Value v;
switch (type) {
case ROW_NUMBER:
((RowNumberData) aggregateData).number++;
v = ValueInt.get(i + 1);
break;
case RANK:
case DENSE_RANK: {
RankData data = (RankData) aggregateData;
data.number++;
Value[] previous = data.previousRow;
if (previous == null) {
data.previousNumber++;
case DENSE_RANK:
case PERCENT_RANK: {
if (i == 0) {
number = 1;
} else {
if (getOverOrderBySort().compare(previous, array) != 0) {
if (type == WindowFunctionType.RANK) {
data.previousNumber = data.number;
} else /* DENSE_RANK */ {
data.previousNumber++;
if (getOverOrderBySort().compare(ordered.get(i - 1), row) != 0) {
switch (type) {
case RANK:
case PERCENT_RANK:
number = i + 1;
break;
default: // DENSE_RANK
number++;
}
}
}
data.previousRow = array;
if (type == WindowFunctionType.PERCENT_RANK) {
int nm = number - 1;
v = nm == 0 ? ValueDouble.ZERO : ValueDouble.get((double) nm / (size - 1));
} else {
v = ValueInt.get(number);
}
break;
}
case CUME_DIST: {
int nm = number;
v = ValueDouble.get((double) nm / size);
break;
}
default:
throw DbException.throwInternalError("type=" + type);
}
result.put(rowId, v);
}
}
@Override
protected Object createAggregateData() {
switch (type) {
case ROW_NUMBER:
return new RowNumberData();
case RANK:
case DENSE_RANK:
return new RankData();
default:
throw DbException.throwInternalError("type=" + type);
private void getCumeDist(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> orderedData,
int last) {
int size = orderedData.size();
for (int start = 0; start < size;) {
Value[] array = orderedData.get(start);
int end = start + 1;
while (end < size && overOrderBySort.compare(array, orderedData.get(end)) == 0) {
end++;
}
ValueDouble v = ValueDouble.get((double) end / size);
for (int i = start; i < end; i++) {
int rowId = orderedData.get(i)[last].getInt();
result.put(rowId, v);
}
start = end;
}
}
@Override
protected Value getAggregatedValue(Session session, Object aggregateData) {
throw DbException.getUnsupportedException("Window function");
}
@Override
public int getType() {
switch (type) {
case ROW_NUMBER:
return ValueInt.get(((RowNumberData) aggregateData).number);
case RANK:
case DENSE_RANK:
return ValueInt.get(((RankData) aggregateData).previousNumber);
return Value.INT;
case PERCENT_RANK:
case CUME_DIST:
return Value.DOUBLE;
default:
throw DbException.throwInternalError("type=" + type);
}
}
@Override
public int getType() {
return Value.INT;
}
@Override
public int getScale() {
return 0;
......@@ -201,12 +227,32 @@ public class WindowFunction extends AbstractAggregate {
@Override
public long getPrecision() {
switch (type) {
case ROW_NUMBER:
case RANK:
case DENSE_RANK:
return ValueInt.PRECISION;
case PERCENT_RANK:
case CUME_DIST:
return ValueDouble.PRECISION;
default:
throw DbException.throwInternalError("type=" + type);
}
}
@Override
public int getDisplaySize() {
switch (type) {
case ROW_NUMBER:
case RANK:
case DENSE_RANK:
return ValueInt.DISPLAY_SIZE;
case PERCENT_RANK:
case CUME_DIST:
return ValueDouble.DISPLAY_SIZE;
default:
throw DbException.throwInternalError("type=" + type);
}
}
@Override
......@@ -222,6 +268,12 @@ public class WindowFunction extends AbstractAggregate {
case DENSE_RANK:
text = "DENSE_RANK";
break;
case PERCENT_RANK:
text = "PERCENT_RANK";
break;
case CUME_DIST:
text = "CUME_DIST";
break;
default:
throw DbException.throwInternalError("type=" + type);
}
......
......@@ -14,66 +14,91 @@ INSERT INTO TEST VALUES
(5, 2, 22),
(6, 3, 31),
(7, 3, 32),
(8, 3, 33);
> update count: 8
(8, 3, 33),
(9, 4, 41);
> update count: 9
SELECT *,
ROW_NUMBER() OVER () RN, RANK() OVER () RK, DENSE_RANK() OVER () DR,
ROW_NUMBER() OVER (ORDER BY ID) RNO, RANK() OVER (ORDER BY ID) RKO, DENSE_RANK() OVER (ORDER BY ID) DRO
ROW_NUMBER() OVER () RN,
RANK() OVER () RK,
DENSE_RANK() OVER () DR,
ROUND(PERCENT_RANK() OVER (), 2) PR,
ROUND(CUME_DIST() OVER (), 2) CD,
ROW_NUMBER() OVER (ORDER BY ID) RNO,
RANK() OVER (ORDER BY ID) RKO,
DENSE_RANK() OVER (ORDER BY ID) DRO,
ROUND(PERCENT_RANK() OVER (ORDER BY ID), 2) PRO,
ROUND(CUME_DIST() OVER (ORDER BY ID), 2) CDO
FROM TEST;
> ID CATEGORY VALUE RN RK DR RNO RKO DRO
> -- -------- ----- -- -- -- --- --- ---
> 1 1 11 1 1 1 1 1 1
> 2 1 12 2 1 1 2 2 2
> 3 1 13 3 1 1 3 3 3
> 4 2 21 4 1 1 4 4 4
> 5 2 22 5 1 1 5 5 5
> 6 3 31 6 1 1 6 6 6
> 7 3 32 7 1 1 7 7 7
> 8 3 33 8 1 1 8 8 8
> rows (ordered): 8
> ID CATEGORY VALUE RN RK DR PR CD RNO RKO DRO PRO CDO
> -- -------- ----- -- -- -- --- --- --- --- --- ---- ----
> 1 1 11 1 1 1 0.0 1.0 1 1 1 0.0 0.11
> 2 1 12 2 1 1 0.0 1.0 2 2 2 0.13 0.22
> 3 1 13 3 1 1 0.0 1.0 3 3 3 0.25 0.33
> 4 2 21 4 1 1 0.0 1.0 4 4 4 0.38 0.44
> 5 2 22 5 1 1 0.0 1.0 5 5 5 0.5 0.56
> 6 3 31 6 1 1 0.0 1.0 6 6 6 0.63 0.67
> 7 3 32 7 1 1 0.0 1.0 7 7 7 0.75 0.78
> 8 3 33 8 1 1 0.0 1.0 8 8 8 0.88 0.89
> 9 4 41 9 1 1 0.0 1.0 9 9 9 1.0 1.0
> rows (ordered): 9
SELECT *,
ROW_NUMBER() OVER (ORDER BY CATEGORY) RN, RANK() OVER (ORDER BY CATEGORY) RK, DENSE_RANK() OVER (ORDER BY CATEGORY) DR
ROW_NUMBER() OVER (ORDER BY CATEGORY) RN,
RANK() OVER (ORDER BY CATEGORY) RK,
DENSE_RANK() OVER (ORDER BY CATEGORY) DR,
ROUND(PERCENT_RANK() OVER (ORDER BY CATEGORY), 2) PR,
ROUND(CUME_DIST() OVER (ORDER BY CATEGORY), 2) CD
FROM TEST;
> ID CATEGORY VALUE RN RK DR
> -- -------- ----- -- -- --
> 1 1 11 1 1 1
> 2 1 12 2 1 1
> 3 1 13 3 1 1
> 4 2 21 4 4 2
> 5 2 22 5 4 2
> 6 3 31 6 6 3
> 7 3 32 7 6 3
> 8 3 33 8 6 3
> rows (ordered): 8
> ID CATEGORY VALUE RN RK DR PR CD
> -- -------- ----- -- -- -- ---- ----
> 1 1 11 1 1 1 0.0 0.33
> 2 1 12 2 1 1 0.0 0.33
> 3 1 13 3 1 1 0.0 0.33
> 4 2 21 4 4 2 0.38 0.56
> 5 2 22 5 4 2 0.38 0.56
> 6 3 31 6 6 3 0.63 0.89
> 7 3 32 7 6 3 0.63 0.89
> 8 3 33 8 6 3 0.63 0.89
> 9 4 41 9 9 4 1.0 1.0
> rows (ordered): 9
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CATEGORY ORDER BY ID) RN,
RANK() OVER (PARTITION BY CATEGORY ORDER BY ID) RK,
DENSE_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID) DR
DENSE_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID) DR,
ROUND(PERCENT_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID), 2) PR,
ROUND(CUME_DIST() OVER (PARTITION BY CATEGORY ORDER BY ID), 2) CD
FROM TEST;
> ID CATEGORY VALUE RN RK DR
> -- -------- ----- -- -- --
> 1 1 11 1 1 1
> 2 1 12 2 2 2
> 3 1 13 3 3 3
> 4 2 21 1 1 1
> 5 2 22 2 2 2
> 6 3 31 1 1 1
> 7 3 32 2 2 2
> 8 3 33 3 3 3
> rows (ordered): 8
> ID CATEGORY VALUE RN RK DR PR CD
> -- -------- ----- -- -- -- --- ----
> 1 1 11 1 1 1 0.0 0.33
> 2 1 12 2 2 2 0.5 0.67
> 3 1 13 3 3 3 1.0 1.0
> 4 2 21 1 1 1 0.0 0.5
> 5 2 22 2 2 2 1.0 1.0
> 6 3 31 1 1 1 0.0 0.33
> 7 3 32 2 2 2 0.5 0.67
> 8 3 33 3 3 3 1.0 1.0
> 9 4 41 1 1 1 0.0 1.0
> rows (ordered): 9
SELECT
ROW_NUMBER() OVER () RN, RANK() OVER () RK, DENSE_RANK() OVER () DR
ROW_NUMBER() OVER () RN,
RANK() OVER () RK,
DENSE_RANK() OVER () DR,
PERCENT_RANK() OVER () PR,
CUME_DIST() OVER () CD
FROM TEST GROUP BY CATEGORY;
> RN RK DR
> -- -- --
> 1 1 1
> 2 1 1
> 3 1 1
> rows: 3
> RN RK DR PR CD
> -- -- -- --- ---
> 1 1 1 0.0 1.0
> 2 1 1 0.0 1.0
> 3 1 1 0.0 1.0
> 4 1 1 0.0 1.0
> rows: 4
DROP TABLE TEST;
> ok
......
......@@ -796,4 +796,4 @@ interior envelopes multilinestring multipoint packed exterior normalization awkw
xym normalizes coord setz xyzm geometrycollection multipolygon mixup rings polygons rejection finite
pointzm pointz pointm dimensionality redefine forum measures
mpg casted pzm mls constrained subtypes complains
ranks rno dro rko precede
ranks rno dro rko precede cume
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论