Unverified 提交 b79444bd authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov 提交者: GitHub

Merge pull request #1463 from katzyn/aggregate

Fix some window aggregates and reduce amount of collecting implementations
......@@ -12,6 +12,7 @@ import java.util.HashMap;
import org.h2.api.ErrorCode;
import org.h2.command.dml.Select;
import org.h2.command.dml.SelectOrderBy;
import org.h2.engine.Database;
import org.h2.engine.Session;
import org.h2.expression.Expression;
import org.h2.expression.ExpressionColumn;
......@@ -191,6 +192,9 @@ public class Aggregate extends AbstractAggregate {
*/
public Aggregate(AggregateType type, Expression on, Select select, boolean distinct) {
super(select, distinct);
if (distinct && type == AggregateType.COUNT_ALL) {
throw DbException.throwInternalError();
}
this.type = type;
this.on = on;
}
......@@ -302,7 +306,7 @@ public class Aggregate extends AbstractAggregate {
v = updateCollecting(session, v, remembered);
}
}
data.add(session.getDatabase(), dataType, distinct, v);
data.add(session.getDatabase(), dataType, v);
}
@Override
......@@ -369,7 +373,7 @@ public class Aggregate extends AbstractAggregate {
@Override
protected Object createAggregateData() {
return AggregateData.create(type);
return AggregateData.create(type, distinct);
}
@Override
......@@ -402,7 +406,7 @@ public class Aggregate extends AbstractAggregate {
return v;
}
case MEDIAN:
return AggregateDataMedian.getResultFromIndex(session, on, dataType);
return AggregateMedian.medianFromIndex(session, on, dataType);
case ENVELOPE:
return ((MVSpatialIndex) AggregateDataEnvelope.getGeometryColumnIndex(on)).getBounds(session);
default:
......@@ -417,6 +421,30 @@ public class Aggregate extends AbstractAggregate {
data = (AggregateData) createAggregateData();
}
switch (type) {
case COUNT:
if (distinct) {
return ValueLong.get(((AggregateDataCollecting) data).getCount());
}
break;
case SUM:
case AVG:
case STDDEV_POP:
case STDDEV_SAMP:
case VAR_POP:
case VAR_SAMP:
if (distinct) {
AggregateDataCollecting c = ((AggregateDataCollecting) data);
if (c.getCount() == 0) {
return ValueNull.INSTANCE;
}
AggregateDataDefault d = new AggregateDataDefault(type);
Database db = session.getDatabase();
for (Value v : c) {
d.add(db, dataType, v);
}
return d.getValue(db, dataType);
}
break;
case GROUP_CONCAT: {
Value[] array = ((AggregateDataCollecting) data).getArray();
if (array == null) {
......@@ -459,6 +487,13 @@ public class Aggregate extends AbstractAggregate {
}
return ValueArray.get(array);
}
case MEDIAN: {
Value[] array = ((AggregateDataCollecting) data).getArray();
if (array == null) {
return ValueNull.INSTANCE;
}
return AggregateMedian.median(session.getDatabase(), array, dataType);
}
case MODE:
if (orderByList != null) {
return ((AggregateDataMode) data).getOrderedValue(session.getDatabase(), dataType,
......@@ -466,8 +501,9 @@ public class Aggregate extends AbstractAggregate {
}
//$FALL-THROUGH$
default:
return data.getValue(session.getDatabase(), dataType, distinct);
// Avoid compiler warning
}
return data.getValue(session.getDatabase(), dataType);
}
@Override
......@@ -762,7 +798,7 @@ public class Aggregate extends AbstractAggregate {
if (distinct) {
return false;
}
return AggregateDataMedian.getMedianColumnIndex(on) != null;
return AggregateMedian.getMedianColumnIndex(on) != null;
case ENVELOPE:
return AggregateDataEnvelope.getGeometryColumnIndex(on) != null;
default:
......
......@@ -7,6 +7,7 @@ package org.h2.expression.aggregate;
import org.h2.engine.Database;
import org.h2.expression.aggregate.Aggregate.AggregateType;
import org.h2.message.DbException;
import org.h2.value.Value;
/**
......@@ -18,30 +19,51 @@ abstract class AggregateData {
* Create an AggregateData object of the correct sub-type.
*
* @param aggregateType the type of the aggregate operation
* @param distinct if the calculation should be distinct
* @return the aggregate data object of the specified type
*/
static AggregateData create(AggregateType aggregateType) {
static AggregateData create(AggregateType aggregateType, boolean distinct) {
switch (aggregateType) {
case SELECTIVITY:
return new AggregateDataSelectivity();
case GROUP_CONCAT:
case ARRAY_AGG:
return new AggregateDataCollecting();
case COUNT_ALL:
return new AggregateDataCountAll();
case COUNT:
if (!distinct) {
return new AggregateDataCount();
case HISTOGRAM:
return new AggregateDataHistogram();
}
break;
case GROUP_CONCAT:
case ARRAY_AGG:
case MEDIAN:
return new AggregateDataMedian();
break;
case MIN:
case MAX:
case BIT_OR:
case BIT_AND:
case BOOL_OR:
case BOOL_AND:
return new AggregateDataDefault(aggregateType);
case SUM:
case AVG:
case STDDEV_POP:
case STDDEV_SAMP:
case VAR_POP:
case VAR_SAMP:
if (!distinct) {
return new AggregateDataDefault(aggregateType);
}
break;
case SELECTIVITY:
return new AggregateDataSelectivity(distinct);
case HISTOGRAM:
return new AggregateDataHistogram(distinct);
case MODE:
return new AggregateDataMode();
case ENVELOPE:
return new AggregateDataEnvelope();
default:
return new AggregateDataDefault(aggregateType);
throw DbException.throwInternalError("type=" + aggregateType);
}
return new AggregateDataCollecting(distinct);
}
/**
......@@ -49,18 +71,16 @@ abstract class AggregateData {
*
* @param database the database
* @param dataType the datatype of the computed result
* @param distinct if the calculation should be distinct
* @param v the value
*/
abstract void add(Database database, int dataType, boolean distinct, Value v);
abstract void add(Database database, int dataType, Value v);
/**
* Get the aggregate result.
*
* @param database the database
* @param dataType the datatype of the computed result
* @param distinct if distinct is used
* @return the value
*/
abstract Value getValue(Database database, int dataType, boolean distinct);
abstract Value getValue(Database database, int dataType);
}
......@@ -7,7 +7,9 @@ package org.h2.expression.aggregate;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import org.h2.engine.Database;
import org.h2.value.Value;
......@@ -15,20 +17,31 @@ import org.h2.value.ValueNull;
/**
* Data stored while calculating an aggregate that needs collecting of all
* values.
* values or a distinct aggregate.
*
* <p>
* NULL values are not collected. {@link #getValue(Database, int, boolean)}
* NULL values are not collected. {@link #getValue(Database, int)}
* method returns {@code null}. Use {@link #getArray()} for instances of this
* class instead. Notice that subclasses like {@link AggregateDataMedian} may
* override {@link #getValue(Database, int, boolean)} to return useful result.
* class instead.
* </p>
*/
class AggregateDataCollecting extends AggregateData {
class AggregateDataCollecting extends AggregateData implements Iterable<Value> {
private final boolean distinct;
Collection<Value> values;
/**
* Creates new instance of data for collecting aggregates.
*
* @param distinct if distinct is used
*/
AggregateDataCollecting(boolean distinct) {
this.distinct = distinct;
}
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
void add(Database database, int dataType, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
......@@ -40,10 +53,19 @@ class AggregateDataCollecting extends AggregateData {
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
Value getValue(Database database, int dataType) {
return null;
}
/**
* Returns the count of values.
*
* @return the count of values
*/
int getCount() {
return values != null ? values.size() : 0;
}
/**
* Returns array with values or {@code null}.
*
......@@ -56,4 +78,10 @@ class AggregateDataCollecting extends AggregateData {
}
return values.toArray(new Value[0]);
}
@Override
public Iterator<Value> iterator() {
return values != null ? values.iterator() : Collections.<Value>emptyIterator();
}
}
......@@ -6,7 +6,6 @@
package org.h2.expression.aggregate;
import org.h2.engine.Database;
import org.h2.util.ValueHashMap;
import org.h2.value.Value;
import org.h2.value.ValueLong;
import org.h2.value.ValueNull;
......@@ -16,31 +15,16 @@ import org.h2.value.ValueNull;
*/
class AggregateDataCount extends AggregateData {
private long count;
private ValueHashMap<AggregateDataCount> distinctValues;
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
void add(Database database, int dataType, Value v) {
if (v != ValueNull.INSTANCE) {
count++;
if (distinct) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
distinctValues.put(v, this);
}
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
if (distinct) {
if (distinctValues != null) {
count = distinctValues.size();
} else {
count = 0;
}
}
Value getValue(Database database, int dataType) {
return ValueLong.get(count).convertTo(dataType);
}
......
......@@ -6,7 +6,6 @@
package org.h2.expression.aggregate;
import org.h2.engine.Database;
import org.h2.message.DbException;
import org.h2.value.Value;
import org.h2.value.ValueLong;
......@@ -14,21 +13,16 @@ import org.h2.value.ValueLong;
* Data stored while calculating a COUNT(*) aggregate.
*/
class AggregateDataCountAll extends AggregateData {
private long count;
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
if (distinct) {
throw DbException.throwInternalError();
}
void add(Database database, int dataType, Value v) {
count++;
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
if (distinct) {
throw DbException.throwInternalError();
}
Value getValue(Database database, int dataType) {
return ValueLong.get(count).convertTo(dataType);
}
......
......@@ -8,7 +8,6 @@ package org.h2.expression.aggregate;
import org.h2.engine.Database;
import org.h2.expression.aggregate.Aggregate.AggregateType;
import org.h2.message.DbException;
import org.h2.util.ValueHashMap;
import org.h2.value.DataType;
import org.h2.value.Value;
import org.h2.value.ValueBoolean;
......@@ -20,9 +19,9 @@ import org.h2.value.ValueNull;
* Data stored while calculating an aggregate.
*/
class AggregateDataDefault extends AggregateData {
private final AggregateType aggregateType;
private long count;
private ValueHashMap<AggregateDataDefault> distinctValues;
private Value value;
private double m2, mean;
......@@ -34,18 +33,11 @@ class AggregateDataDefault extends AggregateData {
}
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
void add(Database database, int dataType, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
count++;
if (distinct) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
distinctValues.put(v, this);
return;
}
switch (aggregateType) {
case SUM:
if (value == null) {
......@@ -127,11 +119,7 @@ class AggregateDataDefault extends AggregateData {
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
if (distinct) {
count = 0;
groupDistinct(database, dataType);
}
Value getValue(Database database, int dataType) {
Value v = null;
switch (aggregateType) {
case SUM:
......@@ -192,14 +180,4 @@ class AggregateDataDefault extends AggregateData {
return a;
}
private void groupDistinct(Database database, int dataType) {
if (distinctValues == null) {
return;
}
count = 0;
for (Value v : distinctValues.keys()) {
add(database, dataType, false, v);
}
}
}
......@@ -57,7 +57,7 @@ class AggregateDataEnvelope extends AggregateData {
}
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
void add(Database database, int dataType, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
......@@ -65,7 +65,7 @@ class AggregateDataEnvelope extends AggregateData {
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
Value getValue(Database database, int dataType) {
return ValueGeometry.fromEnvelope(envelope);
}
......
......@@ -22,10 +22,21 @@ import org.h2.value.ValueLong;
*/
class AggregateDataHistogram extends AggregateData {
private final boolean distinct;
private ValueHashMap<LongDataCounter> distinctValues;
/**
* Creates new instance of data for HISTOGRAM aggregate.
*
* @param distinct if distinct is used
*/
AggregateDataHistogram(boolean distinct) {
this.distinct = distinct;
}
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
void add(Database database, int dataType, Value v) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
......@@ -41,7 +52,7 @@ class AggregateDataHistogram extends AggregateData {
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
Value getValue(Database database, int dataType) {
if (distinctValues == null) {
return ValueArray.get(new Value[0]).convertTo(dataType);
}
......
......@@ -20,7 +20,7 @@ class AggregateDataMode extends AggregateData {
private ValueHashMap<LongDataCounter> distinctValues;
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
void add(Database database, int dataType, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
......@@ -36,7 +36,7 @@ class AggregateDataMode extends AggregateData {
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
Value getValue(Database database, int dataType) {
Value v = ValueNull.INSTANCE;
if (distinctValues != null) {
long count = 0L;
......
......@@ -15,12 +15,24 @@ import org.h2.value.ValueInt;
* Data stored while calculating a SELECTIVITY aggregate.
*/
class AggregateDataSelectivity extends AggregateData {
private final boolean distinct;
private long count;
private IntIntHashMap distinctHashes;
private double m2;
/**
* Creates new instance of data for SELECTIVITY aggregate.
*
* @param distinct if distinct is used
*/
AggregateDataSelectivity(boolean distinct) {
this.distinct = distinct;
}
@Override
void add(Database database, int dataType, boolean distinct, Value v) {
void add(Database database, int dataType, Value v) {
count++;
if (distinctHashes == null) {
distinctHashes = new IntIntHashMap();
......@@ -36,7 +48,7 @@ class AggregateDataSelectivity extends AggregateData {
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
Value getValue(Database database, int dataType) {
if (distinct) {
count = 0;
}
......@@ -53,4 +65,5 @@ class AggregateDataSelectivity extends AggregateData {
v = ValueInt.get(s);
return v.convertTo(dataType);
}
}
......@@ -41,9 +41,10 @@ import org.h2.value.ValueTimestamp;
import org.h2.value.ValueTimestampTimeZone;
/**
* Data stored while calculating a MEDIAN aggregate.
* MEDIAN aggregate.
*/
class AggregateDataMedian extends AggregateDataCollecting {
final class AggregateMedian {
private static boolean isNullsLast(Index index) {
IndexColumn ic = index.getIndexColumns()[0];
int sortType = ic.sortType;
......@@ -91,14 +92,34 @@ class AggregateDataMedian extends AggregateDataCollecting {
}
/**
* Get the result from the index.
* Get the median from the array of values.
*
* @param database the database
* @param array array with values
* @param dataType the data type
* @return the result
*/
static Value median(Database database, Value[] array, int dataType) {
final CompareMode compareMode = database.getCompareMode();
Arrays.sort(array, compareMode);
int len = array.length;
int idx = len / 2;
Value v1 = array[idx];
if ((len & 1) == 1) {
return v1.convertTo(dataType);
}
return getMedian(array[idx - 1], v1, dataType, database.getMode(), compareMode);
}
/**
* Get the median from the index.
*
* @param session the session
* @param on the expression
* @param dataType the data type
* @return the result
*/
static Value getResultFromIndex(Session session, Expression on, int dataType) {
static Value medianFromIndex(Session session, Expression on, int dataType) {
Index index = getMedianColumnIndex(on);
long count = index.getRowCount(session);
if (count == 0) {
......@@ -172,25 +193,9 @@ class AggregateDataMedian extends AggregateDataCollecting {
return v;
}
@Override
Value getValue(Database database, int dataType, boolean distinct) {
Value[] a = getArray();
if (a == null) {
return ValueNull.INSTANCE;
}
final CompareMode compareMode = database.getCompareMode();
Arrays.sort(a, compareMode);
int len = a.length;
int idx = len / 2;
Value v1 = a[idx];
if ((len & 1) == 1) {
return v1.convertTo(dataType);
}
return getMedian(a[idx - 1], v1, dataType, database.getMode(), compareMode);
}
private static Value getMedian(Value v0, Value v1, int dataType, Mode databaseMode, CompareMode compareMode) {
if (v0.compareTo(v1, databaseMode, compareMode) == 0) {
int cmp = v0.compareTo(v1, databaseMode, compareMode);
if (cmp == 0) {
return v0.convertTo(dataType);
}
switch (dataType) {
......@@ -268,9 +273,12 @@ class AggregateDataMedian extends AggregateDataCollecting {
IntervalUtils.intervalToAbsolute((ValueInterval) v0)
.add(IntervalUtils.intervalToAbsolute((ValueInterval) v1)).shiftRight(1));
default:
// Just return first
return v0.convertTo(dataType);
// Just return smaller
return (cmp < 0 ? v0 : v1).convertTo(dataType);
}
}
private AggregateMedian() {
}
}
......@@ -209,7 +209,7 @@ public class JavaAggregate extends AbstractAggregate {
arg = arg.convertTo(argTypes[i]);
argValues[i] = arg;
}
data.add(session.getDatabase(), dataType, true, args.length == 1 ? arg : ValueArray.get(argValues));
data.add(session.getDatabase(), dataType, args.length == 1 ? arg : ValueArray.get(argValues));
} else {
Aggregate agg = (Aggregate) aggregateData;
Object[] argValues = new Object[args.length];
......@@ -254,7 +254,7 @@ public class JavaAggregate extends AbstractAggregate {
@Override
protected Object createAggregateData() {
return distinct ? new AggregateDataCollecting() : getInstance();
return distinct ? new AggregateDataCollecting(true) : getInstance();
}
}
......@@ -69,3 +69,17 @@ SELECT NR FROM (SELECT COUNT(ID) OVER (ORDER BY NAME) AS NR,
DROP TABLE TEST;
> ok
SELECT I, V, COUNT(V) OVER W C, COUNT(DISTINCT V) OVER W D FROM
VALUES (1, 1), (2, 1), (3, 1), (4, 1), (5, 2), (6, 2), (7, 3) T(I, V)
WINDOW W AS (ORDER BY I);
> I V C D
> - - - -
> 1 1 1 1
> 2 1 2 1
> 3 1 3 1
> 4 1 4 1
> 5 2 5 2
> 6 2 6 2
> 7 3 7 3
> rows (ordered): 7
......@@ -81,3 +81,17 @@ SELECT
> 7 28 28 15 36
> 8 36 36 8 36
> rows (ordered): 8
SELECT I, V, SUM(V) OVER W S, SUM(DISTINCT V) OVER W D FROM
VALUES (1, 1), (2, 1), (3, 1), (4, 1), (5, 2), (6, 2), (7, 3) T(I, V)
WINDOW W AS (ORDER BY I);
> I V S D
> - - -- -
> 1 1 1 1
> 2 1 2 1
> 3 1 3 1
> 4 1 4 1
> 5 2 6 3
> 6 2 8 3
> 7 3 11 6
> rows (ordered): 7
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论