提交 fe819dba authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov

Add MEDIAN aggregate

上级 88ad5a36
......@@ -123,7 +123,12 @@ public class Aggregate extends Expression {
/**
* The aggregate type for HISTOGRAM(expression).
*/
HISTOGRAM
HISTOGRAM,
/**
* The aggregate type for MEDIAN(expression).
*/
MEDIAN
}
private static final HashMap<String, AggregateType> AGGREGATES = new HashMap<>(24);
......@@ -187,6 +192,7 @@ public class Aggregate extends Expression {
addAggregate("HISTOGRAM", AggregateType.HISTOGRAM);
addAggregate("BIT_OR", AggregateType.BIT_OR);
addAggregate("BIT_AND", AggregateType.BIT_AND);
addAggregate("MEDIAN", AggregateType.MEDIAN);
}
private static void addAggregate(String name, AggregateType type) {
......@@ -434,6 +440,7 @@ public class Aggregate extends Expression {
break;
case MIN:
case MAX:
case MEDIAN:
break;
case STDDEV_POP:
case STDDEV_SAMP:
......@@ -568,6 +575,9 @@ public class Aggregate extends Expression {
case BIT_OR:
text = "BIT_OR";
break;
case MEDIAN:
text = "MEDIAN";
break;
default:
throw DbException.throwInternalError("type=" + type);
}
......
......@@ -31,6 +31,8 @@ abstract class AggregateData {
return new AggregateDataCount();
} else if (aggregateType == AggregateType.HISTOGRAM) {
return new AggregateDataHistogram();
} else if (aggregateType == AggregateType.MEDIAN) {
return new AggregateDataMedian();
} else {
return new AggregateDataDefault(aggregateType);
}
......
/*
* Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
* and the EPL 1.0 (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.expression;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import org.h2.engine.Database;
import org.h2.util.DateTimeUtils;
import org.h2.value.CompareMode;
import org.h2.value.Value;
import org.h2.value.ValueDate;
import org.h2.value.ValueDecimal;
import org.h2.value.ValueDouble;
import org.h2.value.ValueFloat;
import org.h2.value.ValueInt;
import org.h2.value.ValueLong;
import org.h2.value.ValueNull;
import org.h2.value.ValueTime;
import org.h2.value.ValueTimestamp;
import org.h2.value.ValueTimestampTimeZone;
/**
* Data stored while calculating a MEDIAN aggregate.
*/
class AggregateDataMedian extends AggregateData {
private Collection<Value> values;
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
Collection<Value> c = values;
if (c == null) {
values = c = distinct ? new HashSet<Value>() : new ArrayList<Value>();
}
c.add(v);
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
Collection<Value> c = values;
if (c == null) {
return ValueNull.INSTANCE;
}
if (distinct && c instanceof ArrayList) {
c = new HashSet<>(c);
}
Value[] a = c.toArray(new Value[0]);
final CompareMode mode = database.getCompareMode();
Arrays.sort(a, new Comparator<Value>() {
@Override
public int compare(Value o1, Value o2) {
return o1.compareTo(o2, mode);
}
});
int len = a.length;
int idx = len / 2;
Value v1 = a[idx];
if ((len & 1) == 1) {
return v1.convertTo(dataType);
}
Value v0 = a[idx - 1];
if (v0.compareTo(v1, mode) == 0) {
return v1.convertTo(dataType);
}
switch (dataType) {
case Value.BYTE:
case Value.SHORT:
case Value.INT:
return ValueInt.get((v0.getInt() + v1.getInt()) / 2).convertTo(dataType);
case Value.LONG:
return ValueLong.get((v0.getLong() + v1.getLong()) / 2);
case Value.DECIMAL:
return ValueDecimal.get(v0.getBigDecimal().add(v1.getBigDecimal()).divide(BigDecimal.valueOf(2)));
case Value.FLOAT:
return ValueFloat.get((v0.getFloat() + v1.getFloat()) / 2);
case Value.DOUBLE:
return ValueDouble.get((v0.getFloat() + v1.getDouble()) / 2);
case Value.TIME: {
return ValueTime.fromMillis((v0.getTime().getTime() + v1.getTime().getTime()) / 2);
}
case Value.DATE: {
ValueDate d0 = (ValueDate) v0.convertTo(Value.DATE), d1 = (ValueDate) v1.convertTo(Value.DATE);
return ValueDate.fromDateValue(
DateTimeUtils.dateValueFromAbsoluteDay((DateTimeUtils.absoluteDayFromDateValue(d0.getDateValue())
+ DateTimeUtils.absoluteDayFromDateValue(d1.getDateValue())) / 2));
}
case Value.TIMESTAMP: {
ValueTimestamp ts0 = (ValueTimestamp) v0.convertTo(Value.TIMESTAMP),
ts1 = (ValueTimestamp) v1.convertTo(Value.TIMESTAMP);
return DateTimeUtils.normalizeTimestamp(
(DateTimeUtils.absoluteDayFromDateValue(ts0.getDateValue())
+ DateTimeUtils.absoluteDayFromDateValue(ts1.getDateValue())) / 2,
(ts0.getTimeNanos() + ts1.getTimeNanos()) / 2);
}
case Value.TIMESTAMP_TZ: {
ValueTimestampTimeZone ts0 = (ValueTimestampTimeZone) v0.convertTo(Value.TIMESTAMP_TZ),
ts1 = (ValueTimestampTimeZone) v1.convertTo(Value.TIMESTAMP_TZ);
ValueTimestamp ts = DateTimeUtils.normalizeTimestamp(
(DateTimeUtils.absoluteDayFromDateValue(ts0.getDateValue())
+ DateTimeUtils.absoluteDayFromDateValue(ts1.getDateValue())) / 2,
(ts0.getTimeNanos() + ts1.getTimeNanos()) / 2);
return ValueTimestampTimeZone.fromDateValueAndNanos(ts.getDateValue(), ts.getTimeNanos(),
(short) ((ts0.getTimeZoneOffsetMins() + ts1.getTimeZoneOffsetMins()) / 2));
}
default:
// Just return first
return v0.convertTo(dataType);
}
}
}
......@@ -99,7 +99,7 @@ public class TestScript extends TestBase {
testScript("datatypes/" + s + ".sql");
}
for (String s : new String[] { "avg", "bit-and", "bit-or", "count",
"group-concat", "max", "min", "selectivity", "stddev-pop",
"group-concat", "max", "median", "min", "selectivity", "stddev-pop",
"stddev-samp", "sum", "var-pop", "var-samp" }) {
testScript("functions/aggregate/" + s + ".sql");
}
......
-- Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
-- and the EPL 1.0 (http://h2database.com/html/license.html).
-- Initial Developer: H2 Group
--
create table test(v tinyint);
> ok
insert into test values (20), (20), (10);
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------
> 20
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 20
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 15
insert into test values (10);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 15
drop table test;
> ok
create table test(v smallint);
> ok
insert into test values (20), (20), (10);
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------
> 20
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 20
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 15
insert into test values (10);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 15
drop table test;
> ok
create table test(v int);
> ok
insert into test values (20), (20), (10);
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------
> 20
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 20
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 15
insert into test values (10);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 15
drop table test;
> ok
create table test(v bigint);
> ok
insert into test values (20), (20), (10);
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------
> 20
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 20
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 15
insert into test values (10);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 15
drop table test;
> ok
create table test(v real);
> ok
insert into test values (2), (2), (1);
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------
> 2.0
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 2.0
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 1.5
insert into test values (1);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 1.5
drop table test;
> ok
create table test(v double);
> ok
insert into test values (2), (2), (1);
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------
> 2.0
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 2.0
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 1.5
insert into test values (1);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 1.5
drop table test;
> ok
create table test(v numeric(1));
> ok
insert into test values (2), (2), (1);
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------
> 2
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 2
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 1.5
insert into test values (1);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 1.5
drop table test;
> ok
create table test(v time);
> ok
insert into test values ('20:00:00'), ('20:00:00'), ('10:00:00');
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------
> 20:00:00
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 20:00:00
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 15:00:00
insert into test values ('10:00:00');
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------
> 15:00:00
drop table test;
> ok
create table test(v date);
> ok
insert into test values ('2000-01-20'), ('2000-01-20'), ('2000-01-10');
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ----------
> 2000-01-20
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ----------
> 2000-01-20
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------
> 2000-01-15
insert into test values ('2000-01-10');
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ----------
> 2000-01-15
drop table test;
> ok
create table test(v timestamp);
> ok
insert into test values ('2000-01-20 20:00:00'), ('2000-01-20 20:00:00'), ('2000-01-10 10:00:00');
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ---------------------
> 2000-01-20 20:00:00.0
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------------------
> 2000-01-20 20:00:00.0
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ---------------------
> 2000-01-15 15:00:00.0
insert into test values ('2000-01-10 10:00:00');
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ---------------------
> 2000-01-15 15:00:00.0
drop table test;
> ok
create table test(v timestamp with time zone);
> ok
insert into test values ('2000-01-20 20:00:00+04'), ('2000-01-20 20:00:00+04'), ('2000-01-10 10:00:00+02');
> update count: 3
select median(v) from test;
> MEDIAN(V)
> ------------------------
> 2000-01-20 20:00:00.0+04
insert into test values (null);
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ------------------------
> 2000-01-20 20:00:00.0+04
select median(distinct v) from test;
> MEDIAN(DISTINCT V)
> ------------------------
> 2000-01-15 15:00:00.0+03
insert into test values ('2000-01-10 10:00:00+02');
> update count: 1
select median(v) from test;
> MEDIAN(V)
> ------------------------
> 2000-01-15 15:00:00.0+03
drop table test;
> ok
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论