提交 33e29021 authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov

Add PERCENTILE_CONT() inverse distribution function

上级 3b8fb1b7
...@@ -3674,6 +3674,22 @@ Aggregates are only allowed in select statements. ...@@ -3674,6 +3674,22 @@ Aggregates are only allowed in select statements.
VAR_SAMP(X) VAR_SAMP(X)
" "
"Functions (Aggregate)","PERCENTILE_CONT","
PERCENTILE_CONT(numeric) WITHIN GROUP (ORDER BY value [ASC|DESC])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Return percentile of values from the group with interpolation.
Interpolation is only supported for numeric, date-time, and interval data types.
Argument must be between 0 and 1 inclusive.
Argument must be the same for all rows in the same group.
If argument is NULL, the result is NULL.
NULL values are ignored in the calculation.
If no rows are selected, the result is NULL.
Aggregates are only allowed in select statements.
","
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY V)
"
"Functions (Aggregate)","PERCENTILE_DISC"," "Functions (Aggregate)","PERCENTILE_DISC","
PERCENTILE_DISC(numeric) WITHIN GROUP (ORDER BY value [ASC|DESC]) PERCENTILE_DISC(numeric) WITHIN GROUP (ORDER BY value [ASC|DESC])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification] [FILTER (WHERE expression)] [OVER windowNameOrSpecification]
......
...@@ -3084,6 +3084,7 @@ public class Parser { ...@@ -3084,6 +3084,7 @@ public class Parser {
} }
break; break;
} }
case PERCENTILE_CONT:
case PERCENTILE_DISC: { case PERCENTILE_DISC: {
Expression num = readExpression(); Expression num = readExpression();
read(CLOSE_PAREN); read(CLOSE_PAREN);
......
...@@ -111,6 +111,7 @@ public class Aggregate extends AbstractAggregate { ...@@ -111,6 +111,7 @@ public class Aggregate extends AbstractAggregate {
addAggregate("HISTOGRAM", AggregateType.HISTOGRAM); addAggregate("HISTOGRAM", AggregateType.HISTOGRAM);
addAggregate("BIT_OR", AggregateType.BIT_OR); addAggregate("BIT_OR", AggregateType.BIT_OR);
addAggregate("BIT_AND", AggregateType.BIT_AND); addAggregate("BIT_AND", AggregateType.BIT_AND);
addAggregate("PERCENTILE_CONT", AggregateType.PERCENTILE_CONT);
addAggregate("PERCENTILE_DISC", AggregateType.PERCENTILE_DISC); addAggregate("PERCENTILE_DISC", AggregateType.PERCENTILE_DISC);
addAggregate("MEDIAN", AggregateType.MEDIAN); addAggregate("MEDIAN", AggregateType.MEDIAN);
addAggregate("ARRAY_AGG", AggregateType.ARRAY_AGG); addAggregate("ARRAY_AGG", AggregateType.ARRAY_AGG);
...@@ -199,6 +200,7 @@ public class Aggregate extends AbstractAggregate { ...@@ -199,6 +200,7 @@ public class Aggregate extends AbstractAggregate {
v = updateCollecting(session, v, remembered); v = updateCollecting(session, v, remembered);
} }
break; break;
case PERCENTILE_CONT:
case PERCENTILE_DISC: case PERCENTILE_DISC:
((AggregateDataCollecting) data).setSharedArgument(v); ((AggregateDataCollecting) data).setSharedArgument(v);
v = remembered != null ? remembered[1] : orderByList.get(0).expression.getValue(session); v = remembered != null ? remembered[1] : orderByList.get(0).expression.getValue(session);
...@@ -314,6 +316,7 @@ public class Aggregate extends AbstractAggregate { ...@@ -314,6 +316,7 @@ public class Aggregate extends AbstractAggregate {
} }
return v; return v;
} }
case PERCENTILE_CONT:
case PERCENTILE_DISC: { case PERCENTILE_DISC: {
Value v = on.getValue(session); Value v = on.getValue(session);
if (v == ValueNull.INSTANCE) { if (v == ValueNull.INSTANCE) {
...@@ -322,9 +325,10 @@ public class Aggregate extends AbstractAggregate { ...@@ -322,9 +325,10 @@ public class Aggregate extends AbstractAggregate {
double arg = v.getDouble(); double arg = v.getDouble();
if (arg >= 0d && arg <= 1d) { if (arg >= 0d && arg <= 1d) {
return Percentile.getFromIndex(session, orderByList.get(0).expression, type.getValueType(), return Percentile.getFromIndex(session, orderByList.get(0).expression, type.getValueType(),
orderByList, arg, false); orderByList, arg, aggregateType == AggregateType.PERCENTILE_CONT);
} else { } else {
throw DbException.getInvalidValueException("PERCENTILE_DISC argument", arg); throw DbException.getInvalidValueException(aggregateType == AggregateType.PERCENTILE_CONT ?
"PERCENTILE_CONT argument" : "PERCENTILE_DISC argument", arg);
} }
} }
case MEDIAN: case MEDIAN:
...@@ -387,6 +391,7 @@ public class Aggregate extends AbstractAggregate { ...@@ -387,6 +391,7 @@ public class Aggregate extends AbstractAggregate {
} }
return ValueArray.get(array); return ValueArray.get(array);
} }
case PERCENTILE_CONT:
case PERCENTILE_DISC: { case PERCENTILE_DISC: {
AggregateDataCollecting collectingData = (AggregateDataCollecting) data; AggregateDataCollecting collectingData = (AggregateDataCollecting) data;
Value[] array = collectingData.getArray(); Value[] array = collectingData.getArray();
...@@ -399,9 +404,11 @@ public class Aggregate extends AbstractAggregate { ...@@ -399,9 +404,11 @@ public class Aggregate extends AbstractAggregate {
} }
double arg = v.getDouble(); double arg = v.getDouble();
if (arg >= 0d && arg <= 1d) { if (arg >= 0d && arg <= 1d) {
return Percentile.getValue(session.getDatabase(), array, type.getValueType(), orderByList, arg, false); return Percentile.getValue(session.getDatabase(), array, type.getValueType(), orderByList, arg,
aggregateType == AggregateType.PERCENTILE_CONT);
} else { } else {
throw DbException.getInvalidValueException("PERCENTILE_DISC argument", arg); throw DbException.getInvalidValueException(aggregateType == AggregateType.PERCENTILE_CONT ?
"PERCENTILE_CONT argument" : "PERCENTILE_DISC argument", arg);
} }
} }
case MEDIAN: { case MEDIAN: {
...@@ -584,6 +591,7 @@ public class Aggregate extends AbstractAggregate { ...@@ -584,6 +591,7 @@ public class Aggregate extends AbstractAggregate {
case MAX: case MAX:
case MEDIAN: case MEDIAN:
break; break;
case PERCENTILE_CONT:
case PERCENTILE_DISC: case PERCENTILE_DISC:
case MODE: case MODE:
type = orderByList.get(0).expression.getType(); type = orderByList.get(0).expression.getType();
...@@ -711,6 +719,9 @@ public class Aggregate extends AbstractAggregate { ...@@ -711,6 +719,9 @@ public class Aggregate extends AbstractAggregate {
case BIT_OR: case BIT_OR:
text = "BIT_OR"; text = "BIT_OR";
break; break;
case PERCENTILE_CONT:
text = "PERCENTILE_CONT";
break;
case PERCENTILE_DISC: case PERCENTILE_DISC:
text = "PERCENTILE_DISC"; text = "PERCENTILE_DISC";
break; break;
...@@ -785,6 +796,7 @@ public class Aggregate extends AbstractAggregate { ...@@ -785,6 +796,7 @@ public class Aggregate extends AbstractAggregate {
case MAX: case MAX:
Index index = getMinMaxColumnIndex(); Index index = getMinMaxColumnIndex();
return index != null; return index != null;
case PERCENTILE_CONT:
case PERCENTILE_DISC: case PERCENTILE_DISC:
return on.isConstant() && Percentile.getColumnIndex(orderByList.get(0).expression) != null; return on.isConstant() && Percentile.getColumnIndex(orderByList.get(0).expression) != null;
case MEDIAN: case MEDIAN:
......
...@@ -34,6 +34,7 @@ abstract class AggregateData { ...@@ -34,6 +34,7 @@ abstract class AggregateData {
break; break;
case GROUP_CONCAT: case GROUP_CONCAT:
case ARRAY_AGG: case ARRAY_AGG:
case PERCENTILE_CONT:
case PERCENTILE_DISC: case PERCENTILE_DISC:
case MEDIAN: case MEDIAN:
break; break;
......
...@@ -95,6 +95,11 @@ public enum AggregateType { ...@@ -95,6 +95,11 @@ public enum AggregateType {
*/ */
HISTOGRAM, HISTOGRAM,
/**
* The aggregate type for PERCENTILE_CONT(expression).
*/
PERCENTILE_CONT,
/** /**
* The aggregate type for PERCENTILE_DISC(expression). * The aggregate type for PERCENTILE_DISC(expression).
*/ */
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
package org.h2.expression.aggregate; package org.h2.expression.aggregate;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
...@@ -42,7 +43,7 @@ import org.h2.value.ValueTimestamp; ...@@ -42,7 +43,7 @@ import org.h2.value.ValueTimestamp;
import org.h2.value.ValueTimestampTimeZone; import org.h2.value.ValueTimestampTimeZone;
/** /**
* PERCENTILE_DISC and MEDIAN inverse distribution functions. * PERCENTILE_CONT, PERCENTILE_DISC, and MEDIAN inverse distribution functions.
*/ */
final class Percentile { final class Percentile {
...@@ -135,7 +136,7 @@ final class Percentile { ...@@ -135,7 +136,7 @@ final class Percentile {
if (!interpolate) { if (!interpolate) {
return v.convertTo(dataType); return v.convertTo(dataType);
} }
return getMedian(v, array[rowIdx2], dataType, database.getMode(), compareMode); return interpolate(v, array[rowIdx2], factor, dataType, database.getMode(), compareMode);
} }
/** /**
...@@ -243,12 +244,13 @@ final class Percentile { ...@@ -243,12 +244,13 @@ final class Percentile {
v = v2; v = v2;
v2 = t; v2 = t;
} }
return getMedian(v, v2, dataType, database.getMode(), database.getCompareMode()); return interpolate(v, v2, factor, dataType, database.getMode(), database.getCompareMode());
} }
return v; return v;
} }
private static Value getMedian(Value v0, Value v1, int dataType, Mode databaseMode, CompareMode compareMode) { private static Value interpolate(Value v0, Value v1, double factor, int dataType, Mode databaseMode,
CompareMode compareMode) {
if (v0.compareTo(v1, databaseMode, compareMode) == 0) { if (v0.compareTo(v1, databaseMode, compareMode) == 0) {
return v0.convertTo(dataType); return v0.convertTo(dataType);
} }
...@@ -256,59 +258,71 @@ final class Percentile { ...@@ -256,59 +258,71 @@ final class Percentile {
case Value.BYTE: case Value.BYTE:
case Value.SHORT: case Value.SHORT:
case Value.INT: case Value.INT:
return ValueInt.get((v0.getInt() + v1.getInt()) / 2).convertTo(dataType); return ValueInt.get((int) (v0.getInt() * (1 - factor) + v1.getInt() * factor)).convertTo(dataType);
case Value.LONG: case Value.LONG:
return ValueLong.get((v0.getLong() + v1.getLong()) / 2); return ValueLong
.get(interpolateDecimal(BigDecimal.valueOf(v0.getLong()), BigDecimal.valueOf(v1.getLong()), factor)
.longValue());
case Value.DECIMAL: case Value.DECIMAL:
return ValueDecimal.get(v0.getBigDecimal().add(v1.getBigDecimal()).divide(BigDecimal.valueOf(2))); return ValueDecimal.get(interpolateDecimal(v0.getBigDecimal(), v1.getBigDecimal(), factor));
case Value.FLOAT: case Value.FLOAT:
return ValueFloat.get((v0.getFloat() + v1.getFloat()) / 2); return ValueFloat.get(
interpolateDecimal(BigDecimal.valueOf(v0.getFloat()), BigDecimal.valueOf(v1.getFloat()), factor)
.floatValue());
case Value.DOUBLE: case Value.DOUBLE:
return ValueDouble.get((v0.getFloat() + v1.getDouble()) / 2); return ValueDouble.get(
interpolateDecimal(BigDecimal.valueOf(v0.getDouble()), BigDecimal.valueOf(v1.getDouble()), factor)
.doubleValue());
case Value.TIME: { case Value.TIME: {
ValueTime t0 = (ValueTime) v0.convertTo(Value.TIME), t1 = (ValueTime) v1.convertTo(Value.TIME); ValueTime t0 = (ValueTime) v0.convertTo(Value.TIME), t1 = (ValueTime) v1.convertTo(Value.TIME);
return ValueTime.fromNanos((t0.getNanos() + t1.getNanos()) / 2); BigDecimal n0 = BigDecimal.valueOf(t0.getNanos());
BigDecimal n1 = BigDecimal.valueOf(t1.getNanos());
return ValueTime.fromNanos(interpolateDecimal(n0, n1, factor).longValue());
} }
case Value.DATE: { case Value.DATE: {
ValueDate d0 = (ValueDate) v0.convertTo(Value.DATE), d1 = (ValueDate) v1.convertTo(Value.DATE); ValueDate d0 = (ValueDate) v0.convertTo(Value.DATE), d1 = (ValueDate) v1.convertTo(Value.DATE);
BigDecimal a0 = BigDecimal.valueOf(DateTimeUtils.absoluteDayFromDateValue(d0.getDateValue()));
BigDecimal a1 = BigDecimal.valueOf(DateTimeUtils.absoluteDayFromDateValue(d1.getDateValue()));
return ValueDate.fromDateValue( return ValueDate.fromDateValue(
DateTimeUtils.dateValueFromAbsoluteDay((DateTimeUtils.absoluteDayFromDateValue(d0.getDateValue()) DateTimeUtils.dateValueFromAbsoluteDay(interpolateDecimal(a0, a1, factor).longValue()));
+ DateTimeUtils.absoluteDayFromDateValue(d1.getDateValue())) / 2));
} }
case Value.TIMESTAMP: { case Value.TIMESTAMP: {
ValueTimestamp ts0 = (ValueTimestamp) v0.convertTo(Value.TIMESTAMP), ValueTimestamp ts0 = (ValueTimestamp) v0.convertTo(Value.TIMESTAMP),
ts1 = (ValueTimestamp) v1.convertTo(Value.TIMESTAMP); ts1 = (ValueTimestamp) v1.convertTo(Value.TIMESTAMP);
long dateSum = DateTimeUtils.absoluteDayFromDateValue(ts0.getDateValue()) BigDecimal a0 = timestampToDecimal(ts0.getDateValue(), ts0.getTimeNanos());
+ DateTimeUtils.absoluteDayFromDateValue(ts1.getDateValue()); BigDecimal a1 = timestampToDecimal(ts1.getDateValue(), ts1.getTimeNanos());
long nanos = (ts0.getTimeNanos() + ts1.getTimeNanos()) / 2; BigInteger[] dr = interpolateDecimal(a0, a1, factor).toBigInteger()
if ((dateSum & 1) != 0) { .divideAndRemainder(IntervalUtils.NANOS_PER_DAY_BI);
nanos += DateTimeUtils.NANOS_PER_DAY / 2; long absoluteDay = dr[0].longValue();
if (nanos >= DateTimeUtils.NANOS_PER_DAY) { long timeNanos = dr[1].longValue();
nanos -= DateTimeUtils.NANOS_PER_DAY; if (timeNanos < 0) {
dateSum++; timeNanos += DateTimeUtils.NANOS_PER_DAY;
} absoluteDay--;
} }
return ValueTimestamp.fromDateValueAndNanos(DateTimeUtils.dateValueFromAbsoluteDay(dateSum / 2), nanos); return ValueTimestamp.fromDateValueAndNanos(
DateTimeUtils.dateValueFromAbsoluteDay(absoluteDay), timeNanos);
} }
case Value.TIMESTAMP_TZ: { case Value.TIMESTAMP_TZ: {
ValueTimestampTimeZone ts0 = (ValueTimestampTimeZone) v0.convertTo(Value.TIMESTAMP_TZ), ValueTimestampTimeZone ts0 = (ValueTimestampTimeZone) v0.convertTo(Value.TIMESTAMP_TZ),
ts1 = (ValueTimestampTimeZone) v1.convertTo(Value.TIMESTAMP_TZ); ts1 = (ValueTimestampTimeZone) v1.convertTo(Value.TIMESTAMP_TZ);
long dateSum = DateTimeUtils.absoluteDayFromDateValue(ts0.getDateValue()) BigDecimal a0 = timestampToDecimal(ts0.getDateValue(), ts0.getTimeNanos());
+ DateTimeUtils.absoluteDayFromDateValue(ts1.getDateValue()); BigDecimal a1 = timestampToDecimal(ts1.getDateValue(), ts1.getTimeNanos());
long nanos = (ts0.getTimeNanos() + ts1.getTimeNanos()) / 2; double offset = ts0.getTimeZoneOffsetMins() * (1 - factor) + ts1.getTimeZoneOffsetMins() * factor;
int offset = ts0.getTimeZoneOffsetMins() + ts1.getTimeZoneOffsetMins(); short sOffset = (short) offset;
if ((dateSum & 1) != 0) { BigDecimal bd = interpolateDecimal(a0, a1, factor);
nanos += DateTimeUtils.NANOS_PER_DAY / 2; if (offset != sOffset) {
} bd = bd.add(BigDecimal.valueOf(offset - sOffset)
if ((offset & 1) != 0) { .multiply(BigDecimal.valueOf(DateTimeUtils.NANOS_PER_MINUTE)));
nanos += 30_000_000_000L;
} }
if (nanos >= DateTimeUtils.NANOS_PER_DAY) { BigInteger[] dr = bd.toBigInteger().divideAndRemainder(IntervalUtils.NANOS_PER_DAY_BI);
nanos -= DateTimeUtils.NANOS_PER_DAY; long absoluteDay = dr[0].longValue();
dateSum++; long timeNanos = dr[1].longValue();
if (timeNanos < 0) {
timeNanos += DateTimeUtils.NANOS_PER_DAY;
absoluteDay--;
} }
return ValueTimestampTimeZone.fromDateValueAndNanos(DateTimeUtils.dateValueFromAbsoluteDay(dateSum / 2), return ValueTimestampTimeZone.fromDateValueAndNanos(DateTimeUtils.dateValueFromAbsoluteDay(absoluteDay),
nanos, (short) (offset / 2)); timeNanos, sOffset);
} }
case Value.INTERVAL_YEAR: case Value.INTERVAL_YEAR:
case Value.INTERVAL_MONTH: case Value.INTERVAL_MONTH:
...@@ -324,14 +338,24 @@ final class Percentile { ...@@ -324,14 +338,24 @@ final class Percentile {
case Value.INTERVAL_HOUR_TO_SECOND: case Value.INTERVAL_HOUR_TO_SECOND:
case Value.INTERVAL_MINUTE_TO_SECOND: case Value.INTERVAL_MINUTE_TO_SECOND:
return IntervalUtils.intervalFromAbsolute(IntervalQualifier.valueOf(dataType - Value.INTERVAL_YEAR), return IntervalUtils.intervalFromAbsolute(IntervalQualifier.valueOf(dataType - Value.INTERVAL_YEAR),
IntervalUtils.intervalToAbsolute((ValueInterval) v0) interpolateDecimal(new BigDecimal(IntervalUtils.intervalToAbsolute((ValueInterval) v0)),
.add(IntervalUtils.intervalToAbsolute((ValueInterval) v1)).shiftRight(1)); new BigDecimal(IntervalUtils.intervalToAbsolute((ValueInterval) v1)), factor)
.toBigInteger());
default: default:
// Just return first // Use the same rules as PERCENTILE_DISC
return v0.convertTo(dataType); return (factor > 0.5d ? v1 : v0).convertTo(dataType);
} }
} }
private static BigDecimal timestampToDecimal(long dateValue, long timeNanos) {
return new BigDecimal(BigInteger.valueOf(DateTimeUtils.absoluteDayFromDateValue(dateValue))
.multiply(IntervalUtils.NANOS_PER_DAY_BI).add(BigInteger.valueOf(timeNanos)));
}
private static BigDecimal interpolateDecimal(BigDecimal d0, BigDecimal d1, double factor) {
return d0.multiply(BigDecimal.valueOf(1 - factor)).add(d1.multiply(BigDecimal.valueOf(factor)));
}
private Percentile() { private Percentile() {
} }
......
...@@ -573,6 +573,12 @@ insert into test values ('2000-01-20 20:00:00'), ('2000-01-21 20:00:00'); ...@@ -573,6 +573,12 @@ insert into test values ('2000-01-20 20:00:00'), ('2000-01-21 20:00:00');
select median(v) from test; select median(v) from test;
>> 2000-01-21 08:00:00 >> 2000-01-21 08:00:00
insert into test values ('-2000-01-10 10:00:00'), ('-2000-01-10 10:00:01');
> update count: 2
select percentile_cont(0.16) within group (order by v) from test;
>> -2000-01-10 10:00:00.48
drop table test; drop table test;
> ok > ok
...@@ -609,6 +615,15 @@ insert into test values ('2000-01-20 20:00:00+10:15'), ('2000-01-21 20:00:00-09' ...@@ -609,6 +615,15 @@ insert into test values ('2000-01-20 20:00:00+10:15'), ('2000-01-21 20:00:00-09'
select median(v) from test; select median(v) from test;
>> 2000-01-21 08:00:30+00:37 >> 2000-01-21 08:00:30+00:37
delete from test;
> update count: 2
insert into test values ('-2000-01-20 20:00:00+10:15'), ('-2000-01-21 20:00:00-09');
> update count: 2
select median(v) from test;
>> -2000-01-21 08:00:30+00:37
drop table test; drop table test;
> ok > ok
...@@ -781,6 +796,20 @@ insert into test values (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), ...@@ -781,6 +796,20 @@ insert into test values (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7),
(2, 10), (2, 20), (2, 30), (2, 100); (2, 10), (2, 20), (2, 30), (2, 100);
> update count: 14 > update count: 14
select
percentile_cont(0.05) within group (order by v) c05a,
percentile_cont(0.05) within group (order by v desc) c05d,
percentile_cont(0.5) within group (order by v) c50,
percentile_cont(0.5) within group (order by v desc) c50d,
percentile_cont(0.95) within group (order by v) c95a,
percentile_cont(0.95) within group (order by v desc) c95d,
g from test group by g;
> C05A C05D C50 C50D C95A C95D G
> ---- ---- --- ---- ---- ---- -
> 1 9 5 5 9 1 1
> 11 89 25 25 89 11 2
> rows: 2
select select
percentile_disc(0.05) within group (order by v) d05a, percentile_disc(0.05) within group (order by v) d05a,
percentile_disc(0.05) within group (order by v desc) d05d, percentile_disc(0.05) within group (order by v desc) d05d,
......
...@@ -806,4 +806,4 @@ econd irst bcef ordinality nord unnest ...@@ -806,4 +806,4 @@ econd irst bcef ordinality nord unnest
analyst occupation distributive josaph aor engineer sajeewa isuru randil kevin doctor businessman artist ashan analyst occupation distributive josaph aor engineer sajeewa isuru randil kevin doctor businessman artist ashan
corrupts splitted disruption unintentional octets preconditions predicates subq objectweb insn opcodes corrupts splitted disruption unintentional octets preconditions predicates subq objectweb insn opcodes
preserves masking holder unboxing avert iae transformed subtle reevaluate exclusions subclause ftbl rgr preserves masking holder unboxing avert iae transformed subtle reevaluate exclusions subclause ftbl rgr
presorted inclusion contexts aax mwd percentile interpolate mwa presorted inclusion contexts aax mwd percentile cont interpolate mwa
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论