提交 6d3ad065 authored 作者: noelgrandin's avatar noelgrandin

split up the AggregateData computation into different classes.

The purpose of this is to simplify the code for the individual cases to make it easier to implement analytic (windowing) functions.
上级 39ec60ca
......@@ -240,7 +240,7 @@ public class Aggregate extends Expression {
AggregateData data = (AggregateData) group.get(this);
if (data == null) {
data = new AggregateData(type, dataType);
data = AggregateData.create(type, dataType);
group.put(this, data);
}
Value v = on == null ? null : on.getValue(session);
......@@ -297,11 +297,11 @@ public class Aggregate extends Expression {
}
AggregateData data = (AggregateData) group.get(this);
if (data == null) {
data = new AggregateData(type, dataType);
data = AggregateData.create(type, dataType);
}
Value v = data.getValue(session.getDatabase(), distinct);
if (type == GROUP_CONCAT) {
ArrayList<Value> list = data.getList();
ArrayList<Value> list = ((AggregateDataGroupConcat)data).getList();
if (list == null || list.size() == 0) {
return ValueNull.INSTANCE;
}
......
......@@ -6,41 +6,34 @@
*/
package org.h2.expression;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.message.DbException;
import org.h2.util.IntIntHashMap;
import org.h2.util.New;
import org.h2.util.ValueHashMap;
import org.h2.value.CompareMode;
import org.h2.value.DataType;
import org.h2.value.Value;
import org.h2.value.ValueArray;
import org.h2.value.ValueBoolean;
import org.h2.value.ValueDouble;
import org.h2.value.ValueInt;
import org.h2.value.ValueLong;
import org.h2.value.ValueNull;
/**
* Data stored while calculating an aggregate.
* Abstract class for the computation of an aggregate.
*/
class AggregateData {
private final int aggregateType;
private final int dataType;
private long count;
private IntIntHashMap distinctHashes;
private ValueHashMap<AggregateData> distinctValues;
private Value value;
private double m2, mean;
private ArrayList<Value> list;
AggregateData(int aggregateType, int dataType) {
this.aggregateType = aggregateType;
this.dataType = dataType;
abstract class AggregateData {
/**
* Create an AggregateData object of the correct subtype.
*
* @param aggregateType the type of the aggregate operation
* @param dataType the datatype of the computed result
*/
static AggregateData create(int aggregateType, int dataType) {
if (aggregateType == Aggregate.SELECTIVITY) {
return new AggregateDataSelectivity(dataType);
} else if (aggregateType == Aggregate.GROUP_CONCAT) {
return new AggregateDataGroupConcat();
} else if (aggregateType == Aggregate.COUNT_ALL) {
return new AggregateDataCountAll(dataType);
} else if (aggregateType == Aggregate.COUNT) {
return new AggregateDataCount(dataType);
} else if (aggregateType == Aggregate.HISTOGRAM) {
return new AggregateDataHistogram(dataType);
} else {
return new AggregateDataDefault(aggregateType, dataType);
}
}
/**
......@@ -50,131 +43,8 @@ class AggregateData {
* @param distinct if the calculation should be distinct
* @param v the value
*/
void add(Database database, boolean distinct, Value v) {
if (aggregateType == Aggregate.SELECTIVITY) {
count++;
if (distinctHashes == null) {
distinctHashes = new IntIntHashMap();
}
int size = distinctHashes.size();
if (size > Constants.SELECTIVITY_DISTINCT_COUNT) {
distinctHashes = new IntIntHashMap();
m2 += size;
}
int hash = v.hashCode();
// the value -1 is not supported
distinctHashes.put(hash, 1);
return;
} else if (aggregateType == Aggregate.COUNT_ALL) {
count++;
return;
} else if (aggregateType == Aggregate.HISTOGRAM) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
AggregateData a = distinctValues.get(v);
if (a == null) {
if (distinctValues.size() < Constants.SELECTIVITY_DISTINCT_COUNT) {
a = new AggregateData(Aggregate.HISTOGRAM, dataType);
distinctValues.put(v, a);
}
}
if (a != null) {
a.count++;
}
return;
}
if (v == ValueNull.INSTANCE) {
return;
}
count++;
if (distinct) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
distinctValues.put(v, this);
return;
}
switch (aggregateType) {
case Aggregate.COUNT:
case Aggregate.HISTOGRAM:
return;
case Aggregate.SUM:
if (value == null) {
value = v.convertTo(dataType);
} else {
v = v.convertTo(value.getType());
value = value.add(v);
}
break;
case Aggregate.AVG:
if (value == null) {
value = v.convertTo(DataType.getAddProofType(dataType));
} else {
v = v.convertTo(value.getType());
value = value.add(v);
}
break;
case Aggregate.MIN:
if (value == null || database.compare(v, value) < 0) {
value = v;
}
break;
case Aggregate.MAX:
if (value == null || database.compare(v, value) > 0) {
value = v;
}
break;
case Aggregate.GROUP_CONCAT: {
if (list == null) {
list = New.arrayList();
}
list.add(v);
break;
}
case Aggregate.STDDEV_POP:
case Aggregate.STDDEV_SAMP:
case Aggregate.VAR_POP:
case Aggregate.VAR_SAMP: {
// Using Welford's method, see also
// http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
// http://www.johndcook.com/standard_deviation.html
double x = v.getDouble();
if (count == 1) {
mean = x;
m2 = 0;
} else {
double delta = x - mean;
mean += delta / count;
m2 += delta * (x - mean);
}
break;
}
case Aggregate.BOOL_AND:
v = v.convertTo(Value.BOOLEAN);
if (value == null) {
value = v;
} else {
value = ValueBoolean.get(value.getBoolean().booleanValue() && v.getBoolean().booleanValue());
}
break;
case Aggregate.BOOL_OR:
v = v.convertTo(Value.BOOLEAN);
if (value == null) {
value = v;
} else {
value = ValueBoolean.get(value.getBoolean().booleanValue() || v.getBoolean().booleanValue());
}
break;
default:
DbException.throwInternalError("type=" + aggregateType);
}
}
ArrayList<Value> getList() {
return list;
}
abstract void add(Database database, boolean distinct, Value v);
/**
* Get the aggregate result.
*
......@@ -182,119 +52,5 @@ class AggregateData {
* @param distinct if distinct is used
* @return the value
*/
Value getValue(Database database, boolean distinct) {
if (distinct) {
count = 0;
groupDistinct(database);
}
Value v = null;
switch (aggregateType) {
case Aggregate.SELECTIVITY: {
int s = 0;
if (count == 0) {
s = 0;
} else {
m2 += distinctHashes.size();
m2 = 100 * m2 / count;
s = (int) m2;
s = s <= 0 ? 1 : s > 100 ? 100 : s;
}
v = ValueInt.get(s);
break;
}
case Aggregate.COUNT:
case Aggregate.COUNT_ALL:
v = ValueLong.get(count);
break;
case Aggregate.SUM:
case Aggregate.MIN:
case Aggregate.MAX:
case Aggregate.BOOL_OR:
case Aggregate.BOOL_AND:
v = value;
break;
case Aggregate.AVG:
if (value != null) {
v = divide(value, count);
}
break;
case Aggregate.GROUP_CONCAT:
return null;
case Aggregate.STDDEV_POP: {
if (count < 1) {
return ValueNull.INSTANCE;
}
v = ValueDouble.get(Math.sqrt(m2 / count));
break;
}
case Aggregate.STDDEV_SAMP: {
if (count < 2) {
return ValueNull.INSTANCE;
}
v = ValueDouble.get(Math.sqrt(m2 / (count - 1)));
break;
}
case Aggregate.VAR_POP: {
if (count < 1) {
return ValueNull.INSTANCE;
}
v = ValueDouble.get(m2 / count);
break;
}
case Aggregate.VAR_SAMP: {
if (count < 2) {
return ValueNull.INSTANCE;
}
v = ValueDouble.get(m2 / (count - 1));
break;
}
case Aggregate.HISTOGRAM:
ValueArray[] values = new ValueArray[distinctValues.size()];
int i = 0;
for (Value dv : distinctValues.keys()) {
AggregateData d = distinctValues.get(dv);
values[i] = ValueArray.get(new Value[] {dv, ValueLong.get(d.count)});
i++;
}
final CompareMode compareMode = database.getCompareMode();
Arrays.sort(values, new Comparator<ValueArray>() {
@Override
public int compare(ValueArray v1, ValueArray v2) {
Value a1 = v1.getList()[0];
Value a2 = v2.getList()[0];
return a1.compareTo(a2, compareMode);
}
});
v = ValueArray.get(values);
break;
default:
DbException.throwInternalError("type=" + aggregateType);
}
return v == null ? ValueNull.INSTANCE : v.convertTo(dataType);
}
private static Value divide(Value a, long by) {
if (by == 0) {
return ValueNull.INSTANCE;
}
int type = Value.getHigherOrder(a.getType(), Value.LONG);
Value b = ValueLong.get(by).convertTo(type);
a = a.convertTo(type).divide(b);
return a;
}
private void groupDistinct(Database database) {
if (distinctValues == null) {
return;
}
if (aggregateType == Aggregate.COUNT) {
count = distinctValues.size();
} else {
count = 0;
for (Value v : distinctValues.keys()) {
add(database, false, v);
}
}
}
abstract Value getValue(Database database, boolean distinct);
}
/*
* Copyright 2004-2013 H2 Group. Multiple-Licensed under the H2 License, Version
* 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html). Initial Developer: H2 Group
*/
package org.h2.expression;
import org.h2.engine.Database;
import org.h2.util.ValueHashMap;
import org.h2.value.Value;
import org.h2.value.ValueLong;
import org.h2.value.ValueNull;
/**
* Data stored while calculating an aggregate.
*/
class AggregateDataCount extends AggregateData {
private final int dataType;
private long count;
private ValueHashMap<AggregateDataCount> distinctValues;
/**
* @param dataType the datatype of the computed result
*/
AggregateDataCount(int dataType) {
this.dataType = dataType;
}
/**
* Add a value to this aggregate.
*
* @param database the database
* @param distinct if the calculation should be distinct
* @param v the value
*/
@Override
void add(Database database, boolean distinct, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
count++;
if (distinct) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
distinctValues.put(v, this);
return;
}
}
/**
* Get the aggregate result.
*
* @param database the database
* @param distinct if distinct is used
* @return the value
*/
@Override
Value getValue(Database database, boolean distinct) {
if (distinct) {
if (distinctValues != null) {
count = distinctValues.size();
} else {
count = 0;
}
}
Value v = ValueLong.get(count);
return v.convertTo(dataType);
}
}
/*
* Copyright 2004-2013 H2 Group. Multiple-Licensed under the H2 License, Version
* 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html). Initial Developer: H2 Group
*/
package org.h2.expression;
import org.h2.engine.Database;
import org.h2.message.DbException;
import org.h2.value.Value;
import org.h2.value.ValueLong;
import org.h2.value.ValueNull;
/**
* Data stored while calculating a COUNT(*) aggregate.
*/
class AggregateDataCountAll extends AggregateData {
private final int dataType;
private long count;
AggregateDataCountAll(int dataType) {
this.dataType = dataType;
}
/**
* Add a value to this aggregate.
*
* @param database the database
* @param distinct if the calculation should be distinct
* @param v the value
*/
@Override
void add(Database database, boolean distinct, Value v) {
if (distinct) {
throw DbException.throwInternalError();
}
count++;
}
/**
* Get the aggregate result.
*
* @param database the database
* @param distinct if distinct is used
* @return the value
*/
@Override
Value getValue(Database database, boolean distinct) {
if (distinct) {
throw DbException.throwInternalError();
}
Value v = ValueLong.get(count);
return v == null ? ValueNull.INSTANCE : v.convertTo(dataType);
}
}
/*
* Copyright 2004-2013 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.expression;
import org.h2.engine.Database;
import org.h2.message.DbException;
import org.h2.util.ValueHashMap;
import org.h2.value.DataType;
import org.h2.value.Value;
import org.h2.value.ValueBoolean;
import org.h2.value.ValueDouble;
import org.h2.value.ValueLong;
import org.h2.value.ValueNull;
/**
* Data stored while calculating an aggregate.
*/
class AggregateDataDefault extends AggregateData {
private final int aggregateType;
private final int dataType;
private long count;
private ValueHashMap<AggregateDataDefault> distinctValues;
private Value value;
private double m2, mean;
/**
* @param aggregateType the type of the aggregate operation
* @param dataType the datatype of the computed result
*/
AggregateDataDefault(int aggregateType, int dataType) {
this.aggregateType = aggregateType;
this.dataType = dataType;
}
/**
* Add a value to this aggregate.
*
* @param database the database
* @param distinct if the calculation should be distinct
* @param v the value
*/
@Override
void add(Database database, boolean distinct, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
count++;
if (distinct) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
distinctValues.put(v, this);
return;
}
switch (aggregateType) {
case Aggregate.SUM:
if (value == null) {
value = v.convertTo(dataType);
} else {
v = v.convertTo(value.getType());
value = value.add(v);
}
break;
case Aggregate.AVG:
if (value == null) {
value = v.convertTo(DataType.getAddProofType(dataType));
} else {
v = v.convertTo(value.getType());
value = value.add(v);
}
break;
case Aggregate.MIN:
if (value == null || database.compare(v, value) < 0) {
value = v;
}
break;
case Aggregate.MAX:
if (value == null || database.compare(v, value) > 0) {
value = v;
}
break;
case Aggregate.STDDEV_POP:
case Aggregate.STDDEV_SAMP:
case Aggregate.VAR_POP:
case Aggregate.VAR_SAMP: {
// Using Welford's method, see also
// http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
// http://www.johndcook.com/standard_deviation.html
double x = v.getDouble();
if (count == 1) {
mean = x;
m2 = 0;
} else {
double delta = x - mean;
mean += delta / count;
m2 += delta * (x - mean);
}
break;
}
case Aggregate.BOOL_AND:
v = v.convertTo(Value.BOOLEAN);
if (value == null) {
value = v;
} else {
value = ValueBoolean.get(value.getBoolean().booleanValue() && v.getBoolean().booleanValue());
}
break;
case Aggregate.BOOL_OR:
v = v.convertTo(Value.BOOLEAN);
if (value == null) {
value = v;
} else {
value = ValueBoolean.get(value.getBoolean().booleanValue() || v.getBoolean().booleanValue());
}
break;
default:
DbException.throwInternalError("type=" + aggregateType);
}
}
/**
* Get the aggregate result.
*
* @param database the database
* @param distinct if distinct is used
* @return the value
*/
@Override
Value getValue(Database database, boolean distinct) {
if (distinct) {
count = 0;
groupDistinct(database);
}
Value v = null;
switch (aggregateType) {
case Aggregate.SUM:
case Aggregate.MIN:
case Aggregate.MAX:
case Aggregate.BOOL_OR:
case Aggregate.BOOL_AND:
v = value;
break;
case Aggregate.AVG:
if (value != null) {
v = divide(value, count);
}
break;
case Aggregate.STDDEV_POP: {
if (count < 1) {
return ValueNull.INSTANCE;
}
v = ValueDouble.get(Math.sqrt(m2 / count));
break;
}
case Aggregate.STDDEV_SAMP: {
if (count < 2) {
return ValueNull.INSTANCE;
}
v = ValueDouble.get(Math.sqrt(m2 / (count - 1)));
break;
}
case Aggregate.VAR_POP: {
if (count < 1) {
return ValueNull.INSTANCE;
}
v = ValueDouble.get(m2 / count);
break;
}
case Aggregate.VAR_SAMP: {
if (count < 2) {
return ValueNull.INSTANCE;
}
v = ValueDouble.get(m2 / (count - 1));
break;
}
default:
DbException.throwInternalError("type=" + aggregateType);
}
return v == null ? ValueNull.INSTANCE : v.convertTo(dataType);
}
private static Value divide(Value a, long by) {
if (by == 0) {
return ValueNull.INSTANCE;
}
int type = Value.getHigherOrder(a.getType(), Value.LONG);
Value b = ValueLong.get(by).convertTo(type);
a = a.convertTo(type).divide(b);
return a;
}
private void groupDistinct(Database database) {
if (distinctValues == null) {
return;
}
count = 0;
for (Value v : distinctValues.keys()) {
add(database, false, v);
}
}
}
/*
* Copyright 2004-2013 H2 Group. Multiple-Licensed under the H2 License, Version
* 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html). Initial Developer: H2 Group
*/
package org.h2.expression;
import java.util.ArrayList;
import org.h2.engine.Database;
import org.h2.util.New;
import org.h2.util.ValueHashMap;
import org.h2.value.Value;
import org.h2.value.ValueNull;
/**
* Data stored while calculating a GROUP_CONCAT aggregate.
*/
class AggregateDataGroupConcat extends AggregateData {
private ArrayList<Value> list;
private ValueHashMap<AggregateDataGroupConcat> distinctValues;
AggregateDataGroupConcat() {}
@Override
void add(Database database, boolean distinct, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
if (distinct) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
distinctValues.put(v, this);
return;
}
if (list == null) {
list = New.arrayList();
}
list.add(v);
}
@Override
Value getValue(Database database, boolean distinct) {
if (distinct) {
groupDistinct(database);
}
return null;
}
ArrayList<Value> getList() {
return list;
}
private void groupDistinct(Database database) {
if (distinctValues == null) {
return;
}
for (Value v : distinctValues.keys()) {
add(database, false, v);
}
}
}
/*
* Copyright 2004-2013 H2 Group. Multiple-Licensed under the H2 License, Version
* 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html). Initial Developer: H2 Group
*/
package org.h2.expression;
import java.util.Arrays;
import java.util.Comparator;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.util.ValueHashMap;
import org.h2.value.CompareMode;
import org.h2.value.Value;
import org.h2.value.ValueArray;
import org.h2.value.ValueLong;
/**
* Data stored while calculating a HISTOGRAM aggregate.
*/
class AggregateDataHistogram extends AggregateData {
private final int dataType;
private long count;
private ValueHashMap<AggregateDataHistogram> distinctValues;
AggregateDataHistogram(int dataType) {
this.dataType = dataType;
}
/**
* Add a value to this aggregate.
*
* @param database the database
* @param distinct if the calculation should be distinct
* @param v the value
*/
@Override
void add(Database database, boolean distinct, Value v) {
if (distinctValues == null) {
distinctValues = ValueHashMap.newInstance();
}
AggregateDataHistogram a = distinctValues.get(v);
if (a == null) {
if (distinctValues.size() < Constants.SELECTIVITY_DISTINCT_COUNT) {
a = new AggregateDataHistogram(dataType);
distinctValues.put(v, a);
}
}
if (a != null) {
a.count++;
}
}
/**
* Get the aggregate result.
*
* @param database the database
* @param distinct if distinct is used
* @return the value
*/
@Override
Value getValue(Database database, boolean distinct) {
if (distinct) {
count = 0;
groupDistinct(database);
}
ValueArray[] values = new ValueArray[distinctValues.size()];
int i = 0;
for (Value dv : distinctValues.keys()) {
AggregateDataHistogram d = distinctValues.get(dv);
values[i] = ValueArray.get(new Value[] { dv, ValueLong.get(d.count) });
i++;
}
final CompareMode compareMode = database.getCompareMode();
Arrays.sort(values, new Comparator<ValueArray>() {
@Override
public int compare(ValueArray v1, ValueArray v2) {
Value a1 = v1.getList()[0];
Value a2 = v2.getList()[0];
return a1.compareTo(a2, compareMode);
}
});
Value v = ValueArray.get(values);
return v.convertTo(dataType);
}
private void groupDistinct(Database database) {
if (distinctValues == null) {
return;
}
count = 0;
for (Value v : distinctValues.keys()) {
add(database, false, v);
}
}
}
/*
* Copyright 2004-2013 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.expression;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.util.IntIntHashMap;
import org.h2.value.Value;
import org.h2.value.ValueInt;
/**
* Data stored while calculating a SELECTIVITY aggregate.
*/
class AggregateDataSelectivity extends AggregateData {
private final int dataType;
private long count;
private IntIntHashMap distinctHashes;
private double m2;
AggregateDataSelectivity(int dataType) {
this.dataType = dataType;
}
@Override
void add(Database database, boolean distinct, Value v) {
count++;
if (distinctHashes == null) {
distinctHashes = new IntIntHashMap();
}
int size = distinctHashes.size();
if (size > Constants.SELECTIVITY_DISTINCT_COUNT) {
distinctHashes = new IntIntHashMap();
m2 += size;
}
int hash = v.hashCode();
// the value -1 is not supported
distinctHashes.put(hash, 1);
}
@Override
Value getValue(Database database, boolean distinct) {
if (distinct) {
count = 0;
}
Value v = null;
int s = 0;
if (count == 0) {
s = 0;
} else {
m2 += distinctHashes.size();
m2 = 100 * m2 / count;
s = (int) m2;
s = s <= 0 ? 1 : s > 100 ? 100 : s;
}
v = ValueInt.get(s);
return v.convertTo(dataType);
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论