提交 33e29021 authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov

Add PERCENTILE_CONT() inverse distribution function

上级 3b8fb1b7
......@@ -3674,6 +3674,22 @@ Aggregates are only allowed in select statements.
VAR_SAMP(X)
"
"Functions (Aggregate)","PERCENTILE_CONT","
PERCENTILE_CONT(numeric) WITHIN GROUP (ORDER BY value [ASC|DESC])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
","
Return percentile of values from the group with interpolation.
Interpolation is only supported for numeric, date-time, and interval data types.
Argument must be between 0 and 1 inclusive.
Argument must be the same for all rows in the same group.
If argument is NULL, the result is NULL.
NULL values are ignored in the calculation.
If no rows are selected, the result is NULL.
Aggregates are only allowed in select statements.
","
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY V)
"
"Functions (Aggregate)","PERCENTILE_DISC","
PERCENTILE_DISC(numeric) WITHIN GROUP (ORDER BY value [ASC|DESC])
[FILTER (WHERE expression)] [OVER windowNameOrSpecification]
......
......@@ -3084,6 +3084,7 @@ public class Parser {
}
break;
}
case PERCENTILE_CONT:
case PERCENTILE_DISC: {
Expression num = readExpression();
read(CLOSE_PAREN);
......
......@@ -111,6 +111,7 @@ public class Aggregate extends AbstractAggregate {
addAggregate("HISTOGRAM", AggregateType.HISTOGRAM);
addAggregate("BIT_OR", AggregateType.BIT_OR);
addAggregate("BIT_AND", AggregateType.BIT_AND);
addAggregate("PERCENTILE_CONT", AggregateType.PERCENTILE_CONT);
addAggregate("PERCENTILE_DISC", AggregateType.PERCENTILE_DISC);
addAggregate("MEDIAN", AggregateType.MEDIAN);
addAggregate("ARRAY_AGG", AggregateType.ARRAY_AGG);
......@@ -199,6 +200,7 @@ public class Aggregate extends AbstractAggregate {
v = updateCollecting(session, v, remembered);
}
break;
case PERCENTILE_CONT:
case PERCENTILE_DISC:
((AggregateDataCollecting) data).setSharedArgument(v);
v = remembered != null ? remembered[1] : orderByList.get(0).expression.getValue(session);
......@@ -314,6 +316,7 @@ public class Aggregate extends AbstractAggregate {
}
return v;
}
case PERCENTILE_CONT:
case PERCENTILE_DISC: {
Value v = on.getValue(session);
if (v == ValueNull.INSTANCE) {
......@@ -322,9 +325,10 @@ public class Aggregate extends AbstractAggregate {
double arg = v.getDouble();
if (arg >= 0d && arg <= 1d) {
return Percentile.getFromIndex(session, orderByList.get(0).expression, type.getValueType(),
orderByList, arg, false);
orderByList, arg, aggregateType == AggregateType.PERCENTILE_CONT);
} else {
throw DbException.getInvalidValueException("PERCENTILE_DISC argument", arg);
throw DbException.getInvalidValueException(aggregateType == AggregateType.PERCENTILE_CONT ?
"PERCENTILE_CONT argument" : "PERCENTILE_DISC argument", arg);
}
}
case MEDIAN:
......@@ -387,6 +391,7 @@ public class Aggregate extends AbstractAggregate {
}
return ValueArray.get(array);
}
case PERCENTILE_CONT:
case PERCENTILE_DISC: {
AggregateDataCollecting collectingData = (AggregateDataCollecting) data;
Value[] array = collectingData.getArray();
......@@ -399,9 +404,11 @@ public class Aggregate extends AbstractAggregate {
}
double arg = v.getDouble();
if (arg >= 0d && arg <= 1d) {
return Percentile.getValue(session.getDatabase(), array, type.getValueType(), orderByList, arg, false);
return Percentile.getValue(session.getDatabase(), array, type.getValueType(), orderByList, arg,
aggregateType == AggregateType.PERCENTILE_CONT);
} else {
throw DbException.getInvalidValueException("PERCENTILE_DISC argument", arg);
throw DbException.getInvalidValueException(aggregateType == AggregateType.PERCENTILE_CONT ?
"PERCENTILE_CONT argument" : "PERCENTILE_DISC argument", arg);
}
}
case MEDIAN: {
......@@ -584,6 +591,7 @@ public class Aggregate extends AbstractAggregate {
case MAX:
case MEDIAN:
break;
case PERCENTILE_CONT:
case PERCENTILE_DISC:
case MODE:
type = orderByList.get(0).expression.getType();
......@@ -711,6 +719,9 @@ public class Aggregate extends AbstractAggregate {
case BIT_OR:
text = "BIT_OR";
break;
case PERCENTILE_CONT:
text = "PERCENTILE_CONT";
break;
case PERCENTILE_DISC:
text = "PERCENTILE_DISC";
break;
......@@ -785,6 +796,7 @@ public class Aggregate extends AbstractAggregate {
case MAX:
Index index = getMinMaxColumnIndex();
return index != null;
case PERCENTILE_CONT:
case PERCENTILE_DISC:
return on.isConstant() && Percentile.getColumnIndex(orderByList.get(0).expression) != null;
case MEDIAN:
......
......@@ -34,6 +34,7 @@ abstract class AggregateData {
break;
case GROUP_CONCAT:
case ARRAY_AGG:
case PERCENTILE_CONT:
case PERCENTILE_DISC:
case MEDIAN:
break;
......
......@@ -95,6 +95,11 @@ public enum AggregateType {
*/
HISTOGRAM,
/**
* The aggregate type for PERCENTILE_CONT(expression).
*/
PERCENTILE_CONT,
/**
* The aggregate type for PERCENTILE_DISC(expression).
*/
......
......@@ -6,6 +6,7 @@
package org.h2.expression.aggregate;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
......@@ -42,7 +43,7 @@ import org.h2.value.ValueTimestamp;
import org.h2.value.ValueTimestampTimeZone;
/**
* PERCENTILE_DISC and MEDIAN inverse distribution functions.
* PERCENTILE_CONT, PERCENTILE_DISC, and MEDIAN inverse distribution functions.
*/
final class Percentile {
......@@ -135,7 +136,7 @@ final class Percentile {
if (!interpolate) {
return v.convertTo(dataType);
}
return getMedian(v, array[rowIdx2], dataType, database.getMode(), compareMode);
return interpolate(v, array[rowIdx2], factor, dataType, database.getMode(), compareMode);
}
/**
......@@ -243,12 +244,13 @@ final class Percentile {
v = v2;
v2 = t;
}
return getMedian(v, v2, dataType, database.getMode(), database.getCompareMode());
return interpolate(v, v2, factor, dataType, database.getMode(), database.getCompareMode());
}
return v;
}
private static Value getMedian(Value v0, Value v1, int dataType, Mode databaseMode, CompareMode compareMode) {
private static Value interpolate(Value v0, Value v1, double factor, int dataType, Mode databaseMode,
CompareMode compareMode) {
if (v0.compareTo(v1, databaseMode, compareMode) == 0) {
return v0.convertTo(dataType);
}
......@@ -256,59 +258,71 @@ final class Percentile {
case Value.BYTE:
case Value.SHORT:
case Value.INT:
return ValueInt.get((v0.getInt() + v1.getInt()) / 2).convertTo(dataType);
return ValueInt.get((int) (v0.getInt() * (1 - factor) + v1.getInt() * factor)).convertTo(dataType);
case Value.LONG:
return ValueLong.get((v0.getLong() + v1.getLong()) / 2);
return ValueLong
.get(interpolateDecimal(BigDecimal.valueOf(v0.getLong()), BigDecimal.valueOf(v1.getLong()), factor)
.longValue());
case Value.DECIMAL:
return ValueDecimal.get(v0.getBigDecimal().add(v1.getBigDecimal()).divide(BigDecimal.valueOf(2)));
return ValueDecimal.get(interpolateDecimal(v0.getBigDecimal(), v1.getBigDecimal(), factor));
case Value.FLOAT:
return ValueFloat.get((v0.getFloat() + v1.getFloat()) / 2);
return ValueFloat.get(
interpolateDecimal(BigDecimal.valueOf(v0.getFloat()), BigDecimal.valueOf(v1.getFloat()), factor)
.floatValue());
case Value.DOUBLE:
return ValueDouble.get((v0.getFloat() + v1.getDouble()) / 2);
return ValueDouble.get(
interpolateDecimal(BigDecimal.valueOf(v0.getDouble()), BigDecimal.valueOf(v1.getDouble()), factor)
.doubleValue());
case Value.TIME: {
ValueTime t0 = (ValueTime) v0.convertTo(Value.TIME), t1 = (ValueTime) v1.convertTo(Value.TIME);
return ValueTime.fromNanos((t0.getNanos() + t1.getNanos()) / 2);
BigDecimal n0 = BigDecimal.valueOf(t0.getNanos());
BigDecimal n1 = BigDecimal.valueOf(t1.getNanos());
return ValueTime.fromNanos(interpolateDecimal(n0, n1, factor).longValue());
}
case Value.DATE: {
ValueDate d0 = (ValueDate) v0.convertTo(Value.DATE), d1 = (ValueDate) v1.convertTo(Value.DATE);
BigDecimal a0 = BigDecimal.valueOf(DateTimeUtils.absoluteDayFromDateValue(d0.getDateValue()));
BigDecimal a1 = BigDecimal.valueOf(DateTimeUtils.absoluteDayFromDateValue(d1.getDateValue()));
return ValueDate.fromDateValue(
DateTimeUtils.dateValueFromAbsoluteDay((DateTimeUtils.absoluteDayFromDateValue(d0.getDateValue())
+ DateTimeUtils.absoluteDayFromDateValue(d1.getDateValue())) / 2));
DateTimeUtils.dateValueFromAbsoluteDay(interpolateDecimal(a0, a1, factor).longValue()));
}
case Value.TIMESTAMP: {
ValueTimestamp ts0 = (ValueTimestamp) v0.convertTo(Value.TIMESTAMP),
ts1 = (ValueTimestamp) v1.convertTo(Value.TIMESTAMP);
long dateSum = DateTimeUtils.absoluteDayFromDateValue(ts0.getDateValue())
+ DateTimeUtils.absoluteDayFromDateValue(ts1.getDateValue());
long nanos = (ts0.getTimeNanos() + ts1.getTimeNanos()) / 2;
if ((dateSum & 1) != 0) {
nanos += DateTimeUtils.NANOS_PER_DAY / 2;
if (nanos >= DateTimeUtils.NANOS_PER_DAY) {
nanos -= DateTimeUtils.NANOS_PER_DAY;
dateSum++;
}
BigDecimal a0 = timestampToDecimal(ts0.getDateValue(), ts0.getTimeNanos());
BigDecimal a1 = timestampToDecimal(ts1.getDateValue(), ts1.getTimeNanos());
BigInteger[] dr = interpolateDecimal(a0, a1, factor).toBigInteger()
.divideAndRemainder(IntervalUtils.NANOS_PER_DAY_BI);
long absoluteDay = dr[0].longValue();
long timeNanos = dr[1].longValue();
if (timeNanos < 0) {
timeNanos += DateTimeUtils.NANOS_PER_DAY;
absoluteDay--;
}
return ValueTimestamp.fromDateValueAndNanos(DateTimeUtils.dateValueFromAbsoluteDay(dateSum / 2), nanos);
return ValueTimestamp.fromDateValueAndNanos(
DateTimeUtils.dateValueFromAbsoluteDay(absoluteDay), timeNanos);
}
case Value.TIMESTAMP_TZ: {
ValueTimestampTimeZone ts0 = (ValueTimestampTimeZone) v0.convertTo(Value.TIMESTAMP_TZ),
ts1 = (ValueTimestampTimeZone) v1.convertTo(Value.TIMESTAMP_TZ);
long dateSum = DateTimeUtils.absoluteDayFromDateValue(ts0.getDateValue())
+ DateTimeUtils.absoluteDayFromDateValue(ts1.getDateValue());
long nanos = (ts0.getTimeNanos() + ts1.getTimeNanos()) / 2;
int offset = ts0.getTimeZoneOffsetMins() + ts1.getTimeZoneOffsetMins();
if ((dateSum & 1) != 0) {
nanos += DateTimeUtils.NANOS_PER_DAY / 2;
}
if ((offset & 1) != 0) {
nanos += 30_000_000_000L;
BigDecimal a0 = timestampToDecimal(ts0.getDateValue(), ts0.getTimeNanos());
BigDecimal a1 = timestampToDecimal(ts1.getDateValue(), ts1.getTimeNanos());
double offset = ts0.getTimeZoneOffsetMins() * (1 - factor) + ts1.getTimeZoneOffsetMins() * factor;
short sOffset = (short) offset;
BigDecimal bd = interpolateDecimal(a0, a1, factor);
if (offset != sOffset) {
bd = bd.add(BigDecimal.valueOf(offset - sOffset)
.multiply(BigDecimal.valueOf(DateTimeUtils.NANOS_PER_MINUTE)));
}
if (nanos >= DateTimeUtils.NANOS_PER_DAY) {
nanos -= DateTimeUtils.NANOS_PER_DAY;
dateSum++;
BigInteger[] dr = bd.toBigInteger().divideAndRemainder(IntervalUtils.NANOS_PER_DAY_BI);
long absoluteDay = dr[0].longValue();
long timeNanos = dr[1].longValue();
if (timeNanos < 0) {
timeNanos += DateTimeUtils.NANOS_PER_DAY;
absoluteDay--;
}
return ValueTimestampTimeZone.fromDateValueAndNanos(DateTimeUtils.dateValueFromAbsoluteDay(dateSum / 2),
nanos, (short) (offset / 2));
return ValueTimestampTimeZone.fromDateValueAndNanos(DateTimeUtils.dateValueFromAbsoluteDay(absoluteDay),
timeNanos, sOffset);
}
case Value.INTERVAL_YEAR:
case Value.INTERVAL_MONTH:
......@@ -324,14 +338,24 @@ final class Percentile {
case Value.INTERVAL_HOUR_TO_SECOND:
case Value.INTERVAL_MINUTE_TO_SECOND:
return IntervalUtils.intervalFromAbsolute(IntervalQualifier.valueOf(dataType - Value.INTERVAL_YEAR),
IntervalUtils.intervalToAbsolute((ValueInterval) v0)
.add(IntervalUtils.intervalToAbsolute((ValueInterval) v1)).shiftRight(1));
interpolateDecimal(new BigDecimal(IntervalUtils.intervalToAbsolute((ValueInterval) v0)),
new BigDecimal(IntervalUtils.intervalToAbsolute((ValueInterval) v1)), factor)
.toBigInteger());
default:
// Just return first
return v0.convertTo(dataType);
// Use the same rules as PERCENTILE_DISC
return (factor > 0.5d ? v1 : v0).convertTo(dataType);
}
}
private static BigDecimal timestampToDecimal(long dateValue, long timeNanos) {
return new BigDecimal(BigInteger.valueOf(DateTimeUtils.absoluteDayFromDateValue(dateValue))
.multiply(IntervalUtils.NANOS_PER_DAY_BI).add(BigInteger.valueOf(timeNanos)));
}
private static BigDecimal interpolateDecimal(BigDecimal d0, BigDecimal d1, double factor) {
return d0.multiply(BigDecimal.valueOf(1 - factor)).add(d1.multiply(BigDecimal.valueOf(factor)));
}
private Percentile() {
}
......
......@@ -573,6 +573,12 @@ insert into test values ('2000-01-20 20:00:00'), ('2000-01-21 20:00:00');
select median(v) from test;
>> 2000-01-21 08:00:00
insert into test values ('-2000-01-10 10:00:00'), ('-2000-01-10 10:00:01');
> update count: 2
select percentile_cont(0.16) within group (order by v) from test;
>> -2000-01-10 10:00:00.48
drop table test;
> ok
......@@ -609,6 +615,15 @@ insert into test values ('2000-01-20 20:00:00+10:15'), ('2000-01-21 20:00:00-09'
select median(v) from test;
>> 2000-01-21 08:00:30+00:37
delete from test;
> update count: 2
insert into test values ('-2000-01-20 20:00:00+10:15'), ('-2000-01-21 20:00:00-09');
> update count: 2
select median(v) from test;
>> -2000-01-21 08:00:30+00:37
drop table test;
> ok
......@@ -781,6 +796,20 @@ insert into test values (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7),
(2, 10), (2, 20), (2, 30), (2, 100);
> update count: 14
select
percentile_cont(0.05) within group (order by v) c05a,
percentile_cont(0.05) within group (order by v desc) c05d,
percentile_cont(0.5) within group (order by v) c50,
percentile_cont(0.5) within group (order by v desc) c50d,
percentile_cont(0.95) within group (order by v) c95a,
percentile_cont(0.95) within group (order by v desc) c95d,
g from test group by g;
> C05A C05D C50 C50D C95A C95D G
> ---- ---- --- ---- ---- ---- -
> 1 9 5 5 9 1 1
> 11 89 25 25 89 11 2
> rows: 2
select
percentile_disc(0.05) within group (order by v) d05a,
percentile_disc(0.05) within group (order by v desc) d05d,
......
......@@ -806,4 +806,4 @@ econd irst bcef ordinality nord unnest
analyst occupation distributive josaph aor engineer sajeewa isuru randil kevin doctor businessman artist ashan
corrupts splitted disruption unintentional octets preconditions predicates subq objectweb insn opcodes
preserves masking holder unboxing avert iae transformed subtle reevaluate exclusions subclause ftbl rgr
presorted inclusion contexts aax mwd percentile interpolate mwa
presorted inclusion contexts aax mwd percentile cont interpolate mwa
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论