提交 4b92a2cd authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov

Add PERCENT_RANK() and CUME_DIST() window functions

上级 d988f628
...@@ -5105,8 +5105,37 @@ Gaps in ranks are not possible. ...@@ -5105,8 +5105,37 @@ Gaps in ranks are not possible.
Window functions are currently experimental in H2 and should be used with caution. Window functions are currently experimental in H2 and should be used with caution.
They also may require a lot of memory for large queries. They also may require a lot of memory for large queries.
"," ","
SELECT RANK() OVER (ORDER BY ID), * FROM TEST; SELECT DENSE_RANK() OVER (ORDER BY ID), * FROM TEST;
SELECT RANK() OVER (PARTITION BY CATEGORY ORDER BY ID), * FROM TEST; SELECT DENSE_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID), * FROM TEST;
"
"Functions (Window)","PERCENT_RANK","
PERCENT_RANK() OVER windowSpecification
","
Returns the relative rank of the current row.
The relative rank is calculated as (RANK - 1) / (NR - 1),
where RANK is a rank of the row and NR is a number of rows in window partition with this row.
Window functions are currently experimental in H2 and should be used with caution.
They also may require a lot of memory for large queries.
","
SELECT PERCENT_RANK() OVER (ORDER BY ID), * FROM TEST;
SELECT PERCENT_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID), * FROM TEST;
"
"Functions (Window)","CUME_DIST","
CUME_DIST() OVER windowSpecification
","
Returns the relative rank of the current row.
The relative rank is calculated as NP / NR
where NP is a number of rows that precede the current row or have the same values in ORDER BY columns
and NR is a number of rows in window partition with this row.
Window functions are currently experimental in H2 and should be used with caution.
They also may require a lot of memory for large queries.
","
SELECT CUME_DIST() OVER (ORDER BY ID), * FROM TEST;
SELECT CUME_DIST() OVER (PARTITION BY CATEGORY ORDER BY ID), * FROM TEST;
" "
"System Tables","Information Schema"," "System Tables","Information Schema","
......
...@@ -38,7 +38,7 @@ public abstract class AbstractAggregate extends Expression { ...@@ -38,7 +38,7 @@ public abstract class AbstractAggregate extends Expression {
protected Window over; protected Window over;
private SortOrder overOrderBySort; protected SortOrder overOrderBySort;
private int lastGroupRowId; private int lastGroupRowId;
...@@ -368,6 +368,15 @@ public abstract class AbstractAggregate extends Expression { ...@@ -368,6 +368,15 @@ public abstract class AbstractAggregate extends Expression {
return result; return result;
} }
/***
* Returns aggregated value.
*
* @param session
* the session
* @param aggregateData
* the aggregate data
* @return aggregated value.
*/
protected abstract Value getAggregatedValue(Session session, Object aggregateData); protected abstract Value getAggregatedValue(Session session, Object aggregateData);
private void updateOrderedAggregate(Session session, SelectGroups groupData, int groupRowId, private void updateOrderedAggregate(Session session, SelectGroups groupData, int groupRowId,
...@@ -394,15 +403,31 @@ public abstract class AbstractAggregate extends Expression { ...@@ -394,15 +403,31 @@ public abstract class AbstractAggregate extends Expression {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
ArrayList<Value[]> orderedData = (ArrayList<Value[]>) data; ArrayList<Value[]> orderedData = (ArrayList<Value[]>) data;
int ne = getNumExpressions(); int ne = getNumExpressions();
int last = ne + over.getOrderBy().size(); int rowIdColumn = ne + over.getOrderBy().size();
Collections.sort(orderedData, overOrderBySort); Collections.sort(orderedData, overOrderBySort);
getOrderedResultLoop(session, result, orderedData, rowIdColumn);
partition.setOrderedResult(result);
}
return result.get(groupData.getCurrentGroupRowId());
}
/**
* @param session
* the session
* @param result
* the map to append result to
* @param ordered
* ordered data
* @param rowIdColumn
* the index of row id value
*/
protected void getOrderedResultLoop(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> ordered,
int rowIdColumn) {
Object aggregateData = createAggregateData(); Object aggregateData = createAggregateData();
for (Value[] row : orderedData) { for (Value[] row : ordered) {
updateFromExpressions(session, aggregateData, row); updateFromExpressions(session, aggregateData, row);
result.put(row[last].getInt(), getAggregatedValue(session, aggregateData)); result.put(row[rowIdColumn].getInt(), getAggregatedValue(session, aggregateData));
}
} }
return result.get(groupData.getCurrentGroupRowId());
} }
protected StringBuilder appendTailConditions(StringBuilder builder) { protected StringBuilder appendTailConditions(StringBuilder builder) {
......
...@@ -5,10 +5,14 @@ ...@@ -5,10 +5,14 @@
*/ */
package org.h2.expression.aggregate; package org.h2.expression.aggregate;
import java.util.ArrayList;
import java.util.HashMap;
import org.h2.command.dml.Select; import org.h2.command.dml.Select;
import org.h2.engine.Session; import org.h2.engine.Session;
import org.h2.message.DbException; import org.h2.message.DbException;
import org.h2.value.Value; import org.h2.value.Value;
import org.h2.value.ValueDouble;
import org.h2.value.ValueInt; import org.h2.value.ValueInt;
/** /**
...@@ -36,6 +40,16 @@ public class WindowFunction extends AbstractAggregate { ...@@ -36,6 +40,16 @@ public class WindowFunction extends AbstractAggregate {
*/ */
DENSE_RANK, DENSE_RANK,
/**
* The type for PERCENT_RANK() window function.
*/
PERCENT_RANK,
/**
* The type for CUME_DIST() window function.
*/
CUME_DIST,
; ;
/** /**
...@@ -53,6 +67,10 @@ public class WindowFunction extends AbstractAggregate { ...@@ -53,6 +67,10 @@ public class WindowFunction extends AbstractAggregate {
return RANK; return RANK;
case "DENSE_RANK": case "DENSE_RANK":
return WindowFunctionType.DENSE_RANK; return WindowFunctionType.DENSE_RANK;
case "PERCENT_RANK":
return WindowFunctionType.PERCENT_RANK;
case "CUME_DIST":
return WindowFunctionType.CUME_DIST;
default: default:
return null; return null;
} }
...@@ -60,27 +78,7 @@ public class WindowFunction extends AbstractAggregate { ...@@ -60,27 +78,7 @@ public class WindowFunction extends AbstractAggregate {
} }
private static class RowNumberData { private final WindowFunctionType type;
int number;
RowNumberData() {
}
}
private static final class RankData extends RowNumberData {
Value[] previousRow;
int previousNumber;
RankData() {
}
}
private WindowFunctionType type;
/** /**
* Creates new instance of a window function. * Creates new instance of a window function.
...@@ -102,20 +100,7 @@ public class WindowFunction extends AbstractAggregate { ...@@ -102,20 +100,7 @@ public class WindowFunction extends AbstractAggregate {
@Override @Override
protected void updateAggregate(Session session, Object aggregateData) { protected void updateAggregate(Session session, Object aggregateData) {
switch (type) { throw DbException.getUnsupportedException("Window function");
case ROW_NUMBER:
((RowNumberData) aggregateData).number++;
break;
case RANK:
case DENSE_RANK: {
RankData data = (RankData) aggregateData;
data.number++;
data.previousNumber++;
break;
}
default:
throw DbException.throwInternalError("type=" + type);
}
} }
@Override @Override
...@@ -135,65 +120,106 @@ public class WindowFunction extends AbstractAggregate { ...@@ -135,65 +120,106 @@ public class WindowFunction extends AbstractAggregate {
@Override @Override
protected void updateFromExpressions(Session session, Object aggregateData, Value[] array) { protected void updateFromExpressions(Session session, Object aggregateData, Value[] array) {
throw DbException.getUnsupportedException("Window function");
}
@Override
protected Object createAggregateData() {
throw DbException.getUnsupportedException("Window function");
}
@Override
protected void getOrderedResultLoop(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> ordered,
int rowIdColumn) {
if (type == WindowFunctionType.CUME_DIST) {
getCumeDist(session, result, ordered, rowIdColumn);
return;
}
int size = ordered.size();
int number = 0;
for (int i = 0; i < size; i++) {
Value[] row = ordered.get(i);
int rowId = row[rowIdColumn].getInt();
Value v;
switch (type) { switch (type) {
case ROW_NUMBER: case ROW_NUMBER:
((RowNumberData) aggregateData).number++; v = ValueInt.get(i + 1);
break; break;
case RANK: case RANK:
case DENSE_RANK: { case DENSE_RANK:
RankData data = (RankData) aggregateData; case PERCENT_RANK: {
data.number++; if (i == 0) {
Value[] previous = data.previousRow; number = 1;
if (previous == null) {
data.previousNumber++;
} else { } else {
if (getOverOrderBySort().compare(previous, array) != 0) { if (getOverOrderBySort().compare(ordered.get(i - 1), row) != 0) {
if (type == WindowFunctionType.RANK) { switch (type) {
data.previousNumber = data.number; case RANK:
} else /* DENSE_RANK */ { case PERCENT_RANK:
data.previousNumber++; number = i + 1;
break;
default: // DENSE_RANK
number++;
} }
} }
} }
data.previousRow = array; if (type == WindowFunctionType.PERCENT_RANK) {
int nm = number - 1;
v = nm == 0 ? ValueDouble.ZERO : ValueDouble.get((double) nm / (size - 1));
} else {
v = ValueInt.get(number);
}
break;
}
case CUME_DIST: {
int nm = number;
v = ValueDouble.get((double) nm / size);
break; break;
} }
default: default:
throw DbException.throwInternalError("type=" + type); throw DbException.throwInternalError("type=" + type);
} }
result.put(rowId, v);
}
} }
@Override private void getCumeDist(Session session, HashMap<Integer, Value> result, ArrayList<Value[]> orderedData,
protected Object createAggregateData() { int last) {
switch (type) { int size = orderedData.size();
case ROW_NUMBER: for (int start = 0; start < size;) {
return new RowNumberData(); Value[] array = orderedData.get(start);
case RANK: int end = start + 1;
case DENSE_RANK: while (end < size && overOrderBySort.compare(array, orderedData.get(end)) == 0) {
return new RankData(); end++;
default: }
throw DbException.throwInternalError("type=" + type); ValueDouble v = ValueDouble.get((double) end / size);
for (int i = start; i < end; i++) {
int rowId = orderedData.get(i)[last].getInt();
result.put(rowId, v);
}
start = end;
} }
} }
@Override @Override
protected Value getAggregatedValue(Session session, Object aggregateData) { protected Value getAggregatedValue(Session session, Object aggregateData) {
throw DbException.getUnsupportedException("Window function");
}
@Override
public int getType() {
switch (type) { switch (type) {
case ROW_NUMBER: case ROW_NUMBER:
return ValueInt.get(((RowNumberData) aggregateData).number);
case RANK: case RANK:
case DENSE_RANK: case DENSE_RANK:
return ValueInt.get(((RankData) aggregateData).previousNumber); return Value.INT;
case PERCENT_RANK:
case CUME_DIST:
return Value.DOUBLE;
default: default:
throw DbException.throwInternalError("type=" + type); throw DbException.throwInternalError("type=" + type);
} }
} }
@Override
public int getType() {
return Value.INT;
}
@Override @Override
public int getScale() { public int getScale() {
return 0; return 0;
...@@ -201,12 +227,32 @@ public class WindowFunction extends AbstractAggregate { ...@@ -201,12 +227,32 @@ public class WindowFunction extends AbstractAggregate {
@Override @Override
public long getPrecision() { public long getPrecision() {
switch (type) {
case ROW_NUMBER:
case RANK:
case DENSE_RANK:
return ValueInt.PRECISION; return ValueInt.PRECISION;
case PERCENT_RANK:
case CUME_DIST:
return ValueDouble.PRECISION;
default:
throw DbException.throwInternalError("type=" + type);
}
} }
@Override @Override
public int getDisplaySize() { public int getDisplaySize() {
switch (type) {
case ROW_NUMBER:
case RANK:
case DENSE_RANK:
return ValueInt.DISPLAY_SIZE; return ValueInt.DISPLAY_SIZE;
case PERCENT_RANK:
case CUME_DIST:
return ValueDouble.DISPLAY_SIZE;
default:
throw DbException.throwInternalError("type=" + type);
}
} }
@Override @Override
...@@ -222,6 +268,12 @@ public class WindowFunction extends AbstractAggregate { ...@@ -222,6 +268,12 @@ public class WindowFunction extends AbstractAggregate {
case DENSE_RANK: case DENSE_RANK:
text = "DENSE_RANK"; text = "DENSE_RANK";
break; break;
case PERCENT_RANK:
text = "PERCENT_RANK";
break;
case CUME_DIST:
text = "CUME_DIST";
break;
default: default:
throw DbException.throwInternalError("type=" + type); throw DbException.throwInternalError("type=" + type);
} }
......
...@@ -14,66 +14,91 @@ INSERT INTO TEST VALUES ...@@ -14,66 +14,91 @@ INSERT INTO TEST VALUES
(5, 2, 22), (5, 2, 22),
(6, 3, 31), (6, 3, 31),
(7, 3, 32), (7, 3, 32),
(8, 3, 33); (8, 3, 33),
> update count: 8 (9, 4, 41);
> update count: 9
SELECT *, SELECT *,
ROW_NUMBER() OVER () RN, RANK() OVER () RK, DENSE_RANK() OVER () DR, ROW_NUMBER() OVER () RN,
ROW_NUMBER() OVER (ORDER BY ID) RNO, RANK() OVER (ORDER BY ID) RKO, DENSE_RANK() OVER (ORDER BY ID) DRO RANK() OVER () RK,
DENSE_RANK() OVER () DR,
ROUND(PERCENT_RANK() OVER (), 2) PR,
ROUND(CUME_DIST() OVER (), 2) CD,
ROW_NUMBER() OVER (ORDER BY ID) RNO,
RANK() OVER (ORDER BY ID) RKO,
DENSE_RANK() OVER (ORDER BY ID) DRO,
ROUND(PERCENT_RANK() OVER (ORDER BY ID), 2) PRO,
ROUND(CUME_DIST() OVER (ORDER BY ID), 2) CDO
FROM TEST; FROM TEST;
> ID CATEGORY VALUE RN RK DR RNO RKO DRO > ID CATEGORY VALUE RN RK DR PR CD RNO RKO DRO PRO CDO
> -- -------- ----- -- -- -- --- --- --- > -- -------- ----- -- -- -- --- --- --- --- --- ---- ----
> 1 1 11 1 1 1 1 1 1 > 1 1 11 1 1 1 0.0 1.0 1 1 1 0.0 0.11
> 2 1 12 2 1 1 2 2 2 > 2 1 12 2 1 1 0.0 1.0 2 2 2 0.13 0.22
> 3 1 13 3 1 1 3 3 3 > 3 1 13 3 1 1 0.0 1.0 3 3 3 0.25 0.33
> 4 2 21 4 1 1 4 4 4 > 4 2 21 4 1 1 0.0 1.0 4 4 4 0.38 0.44
> 5 2 22 5 1 1 5 5 5 > 5 2 22 5 1 1 0.0 1.0 5 5 5 0.5 0.56
> 6 3 31 6 1 1 6 6 6 > 6 3 31 6 1 1 0.0 1.0 6 6 6 0.63 0.67
> 7 3 32 7 1 1 7 7 7 > 7 3 32 7 1 1 0.0 1.0 7 7 7 0.75 0.78
> 8 3 33 8 1 1 8 8 8 > 8 3 33 8 1 1 0.0 1.0 8 8 8 0.88 0.89
> rows (ordered): 8 > 9 4 41 9 1 1 0.0 1.0 9 9 9 1.0 1.0
> rows (ordered): 9
SELECT *, SELECT *,
ROW_NUMBER() OVER (ORDER BY CATEGORY) RN, RANK() OVER (ORDER BY CATEGORY) RK, DENSE_RANK() OVER (ORDER BY CATEGORY) DR ROW_NUMBER() OVER (ORDER BY CATEGORY) RN,
RANK() OVER (ORDER BY CATEGORY) RK,
DENSE_RANK() OVER (ORDER BY CATEGORY) DR,
ROUND(PERCENT_RANK() OVER (ORDER BY CATEGORY), 2) PR,
ROUND(CUME_DIST() OVER (ORDER BY CATEGORY), 2) CD
FROM TEST; FROM TEST;
> ID CATEGORY VALUE RN RK DR > ID CATEGORY VALUE RN RK DR PR CD
> -- -------- ----- -- -- -- > -- -------- ----- -- -- -- ---- ----
> 1 1 11 1 1 1 > 1 1 11 1 1 1 0.0 0.33
> 2 1 12 2 1 1 > 2 1 12 2 1 1 0.0 0.33
> 3 1 13 3 1 1 > 3 1 13 3 1 1 0.0 0.33
> 4 2 21 4 4 2 > 4 2 21 4 4 2 0.38 0.56
> 5 2 22 5 4 2 > 5 2 22 5 4 2 0.38 0.56
> 6 3 31 6 6 3 > 6 3 31 6 6 3 0.63 0.89
> 7 3 32 7 6 3 > 7 3 32 7 6 3 0.63 0.89
> 8 3 33 8 6 3 > 8 3 33 8 6 3 0.63 0.89
> rows (ordered): 8 > 9 4 41 9 9 4 1.0 1.0
> rows (ordered): 9
SELECT *, SELECT *,
ROW_NUMBER() OVER (PARTITION BY CATEGORY ORDER BY ID) RN, ROW_NUMBER() OVER (PARTITION BY CATEGORY ORDER BY ID) RN,
RANK() OVER (PARTITION BY CATEGORY ORDER BY ID) RK, RANK() OVER (PARTITION BY CATEGORY ORDER BY ID) RK,
DENSE_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID) DR DENSE_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID) DR,
ROUND(PERCENT_RANK() OVER (PARTITION BY CATEGORY ORDER BY ID), 2) PR,
ROUND(CUME_DIST() OVER (PARTITION BY CATEGORY ORDER BY ID), 2) CD
FROM TEST; FROM TEST;
> ID CATEGORY VALUE RN RK DR > ID CATEGORY VALUE RN RK DR PR CD
> -- -------- ----- -- -- -- > -- -------- ----- -- -- -- --- ----
> 1 1 11 1 1 1 > 1 1 11 1 1 1 0.0 0.33
> 2 1 12 2 2 2 > 2 1 12 2 2 2 0.5 0.67
> 3 1 13 3 3 3 > 3 1 13 3 3 3 1.0 1.0
> 4 2 21 1 1 1 > 4 2 21 1 1 1 0.0 0.5
> 5 2 22 2 2 2 > 5 2 22 2 2 2 1.0 1.0
> 6 3 31 1 1 1 > 6 3 31 1 1 1 0.0 0.33
> 7 3 32 2 2 2 > 7 3 32 2 2 2 0.5 0.67
> 8 3 33 3 3 3 > 8 3 33 3 3 3 1.0 1.0
> rows (ordered): 8 > 9 4 41 1 1 1 0.0 1.0
> rows (ordered): 9
SELECT SELECT
ROW_NUMBER() OVER () RN, RANK() OVER () RK, DENSE_RANK() OVER () DR ROW_NUMBER() OVER () RN,
RANK() OVER () RK,
DENSE_RANK() OVER () DR,
PERCENT_RANK() OVER () PR,
CUME_DIST() OVER () CD
FROM TEST GROUP BY CATEGORY; FROM TEST GROUP BY CATEGORY;
> RN RK DR > RN RK DR PR CD
> -- -- -- > -- -- -- --- ---
> 1 1 1 > 1 1 1 0.0 1.0
> 2 1 1 > 2 1 1 0.0 1.0
> 3 1 1 > 3 1 1 0.0 1.0
> rows: 3 > 4 1 1 0.0 1.0
> rows: 4
DROP TABLE TEST; DROP TABLE TEST;
> ok > ok
......
...@@ -796,4 +796,4 @@ interior envelopes multilinestring multipoint packed exterior normalization awkw ...@@ -796,4 +796,4 @@ interior envelopes multilinestring multipoint packed exterior normalization awkw
xym normalizes coord setz xyzm geometrycollection multipolygon mixup rings polygons rejection finite xym normalizes coord setz xyzm geometrycollection multipolygon mixup rings polygons rejection finite
pointzm pointz pointm dimensionality redefine forum measures pointzm pointz pointm dimensionality redefine forum measures
mpg casted pzm mls constrained subtypes complains mpg casted pzm mls constrained subtypes complains
ranks rno dro rko precede ranks rno dro rko precede cume
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论