Unverified 提交 fdaab385 authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov 提交者: GitHub

Merge pull request #1515 from katzyn/aggregate

Use the same aggregate data for HISTOGRAM and MODE
...@@ -9,10 +9,12 @@ import java.util.ArrayList; ...@@ -9,10 +9,12 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map.Entry;
import org.h2.api.ErrorCode; import org.h2.api.ErrorCode;
import org.h2.command.dml.Select; import org.h2.command.dml.Select;
import org.h2.command.dml.SelectOrderBy; import org.h2.command.dml.SelectOrderBy;
import org.h2.engine.Database; import org.h2.engine.Database;
import org.h2.engine.Mode;
import org.h2.engine.Session; import org.h2.engine.Session;
import org.h2.expression.Expression; import org.h2.expression.Expression;
import org.h2.expression.ExpressionColumn; import org.h2.expression.ExpressionColumn;
...@@ -29,6 +31,8 @@ import org.h2.table.Table; ...@@ -29,6 +31,8 @@ import org.h2.table.Table;
import org.h2.table.TableFilter; import org.h2.table.TableFilter;
import org.h2.util.StatementBuilder; import org.h2.util.StatementBuilder;
import org.h2.util.StringUtils; import org.h2.util.StringUtils;
import org.h2.util.ValueHashMap;
import org.h2.value.CompareMode;
import org.h2.value.DataType; import org.h2.value.DataType;
import org.h2.value.Value; import org.h2.value.Value;
import org.h2.value.ValueArray; import org.h2.value.ValueArray;
...@@ -445,33 +449,10 @@ public class Aggregate extends AbstractAggregate { ...@@ -445,33 +449,10 @@ public class Aggregate extends AbstractAggregate {
return d.getValue(db, dataType); return d.getValue(db, dataType);
} }
break; break;
case GROUP_CONCAT: { case HISTOGRAM:
Value[] array = ((AggregateDataCollecting) data).getArray(); return getHistogram(session, data);
if (array == null) { case GROUP_CONCAT:
return ValueNull.INSTANCE; return getGroupConcat(session, data);
}
if (orderByList != null || distinct) {
sortWithOrderBy(array);
}
StatementBuilder buff = new StatementBuilder();
String sep = groupConcatSeparator == null ? "," : groupConcatSeparator.getValue(session).getString();
for (Value val : array) {
String s;
if (val.getType() == Value.ARRAY) {
s = ((ValueArray) val).getList()[0].getString();
} else {
s = val.getString();
}
if (s == null) {
continue;
}
if (sep != null) {
buff.appendExceptFirst(sep);
}
buff.append(s);
}
return ValueString.get(buff.toString());
}
case ARRAY_AGG: { case ARRAY_AGG: {
Value[] array = ((AggregateDataCollecting) data).getArray(); Value[] array = ((AggregateDataCollecting) data).getArray();
if (array == null) { if (array == null) {
...@@ -495,17 +476,106 @@ public class Aggregate extends AbstractAggregate { ...@@ -495,17 +476,106 @@ public class Aggregate extends AbstractAggregate {
return AggregateMedian.median(session.getDatabase(), array, dataType); return AggregateMedian.median(session.getDatabase(), array, dataType);
} }
case MODE: case MODE:
if (orderByList != null) { return getMode(session, data);
return ((AggregateDataMode) data).getOrderedValue(session.getDatabase(), dataType,
(orderByList.get(0).sortType & SortOrder.DESCENDING) != 0);
}
//$FALL-THROUGH$
default: default:
// Avoid compiler warning // Avoid compiler warning
} }
return data.getValue(session.getDatabase(), dataType); return data.getValue(session.getDatabase(), dataType);
} }
private Value getGroupConcat(Session session, AggregateData data) {
Value[] array = ((AggregateDataCollecting) data).getArray();
if (array == null) {
return ValueNull.INSTANCE;
}
if (orderByList != null || distinct) {
sortWithOrderBy(array);
}
StatementBuilder buff = new StatementBuilder();
String sep = groupConcatSeparator == null ? "," : groupConcatSeparator.getValue(session).getString();
for (Value val : array) {
String s;
if (val.getType() == Value.ARRAY) {
s = ((ValueArray) val).getList()[0].getString();
} else {
s = val.getString();
}
if (s == null) {
continue;
}
if (sep != null) {
buff.appendExceptFirst(sep);
}
buff.append(s);
}
return ValueString.get(buff.toString());
}
private Value getHistogram(Session session, AggregateData data) {
ValueHashMap<LongDataCounter> distinctValues = ((AggregateDataDistinctWithCounts) data).getValues();
if (distinctValues == null) {
return ValueArray.getEmpty();
}
ValueArray[] values = new ValueArray[distinctValues.size()];
int i = 0;
for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
LongDataCounter d = entry.getValue();
values[i] = ValueArray.get(new Value[] { entry.getKey(), ValueLong.get(distinct ? 1L : d.count) });
i++;
}
Database db = session.getDatabase();
final Mode mode = db.getMode();
final CompareMode compareMode = db.getCompareMode();
Arrays.sort(values, new Comparator<ValueArray>() {
@Override
public int compare(ValueArray v1, ValueArray v2) {
Value a1 = v1.getList()[0];
Value a2 = v2.getList()[0];
return a1.compareTo(a2, mode, compareMode);
}
});
return ValueArray.get(values);
}
private Value getMode(Session session, AggregateData data) {
Value v = ValueNull.INSTANCE;
ValueHashMap<LongDataCounter> distinctValues = ((AggregateDataDistinctWithCounts) data).getValues();
if (distinctValues == null) {
return v;
}
long count = 0L;
if (orderByList != null) {
boolean desc = (orderByList.get(0).sortType & SortOrder.DESCENDING) != 0;
for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
long c = entry.getValue().count;
if (c > count) {
v = entry.getKey();
count = c;
} else if (c == count) {
Value v2 = entry.getKey();
int cmp = session.getDatabase().compareTypeSafe(v, v2);
if (desc) {
if (cmp >= 0) {
continue;
}
} else if (cmp <= 0) {
continue;
}
v = v2;
}
}
} else {
for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
long c = entry.getValue().count;
if (c > count) {
v = entry.getKey();
count = c;
}
}
}
return v.convertTo(dataType);
}
@Override @Override
public int getType() { public int getType() {
return dataType; return dataType;
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
*/ */
package org.h2.expression.aggregate; package org.h2.expression.aggregate;
import org.h2.engine.Constants;
import org.h2.engine.Database; import org.h2.engine.Database;
import org.h2.expression.aggregate.Aggregate.AggregateType; import org.h2.expression.aggregate.Aggregate.AggregateType;
import org.h2.message.DbException; import org.h2.message.DbException;
...@@ -55,9 +56,9 @@ abstract class AggregateData { ...@@ -55,9 +56,9 @@ abstract class AggregateData {
case SELECTIVITY: case SELECTIVITY:
return new AggregateDataSelectivity(distinct); return new AggregateDataSelectivity(distinct);
case HISTOGRAM: case HISTOGRAM:
return new AggregateDataHistogram(distinct); return new AggregateDataDistinctWithCounts(false, Constants.SELECTIVITY_DISTINCT_COUNT);
case MODE: case MODE:
return new AggregateDataMode(); return new AggregateDataDistinctWithCounts(true, Integer.MAX_VALUE);
case ENVELOPE: case ENVELOPE:
return new AggregateDataEnvelope(); return new AggregateDataEnvelope();
default: default:
......
/*
* Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
* and the EPL 1.0 (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.expression.aggregate;
import org.h2.engine.Database;
import org.h2.util.ValueHashMap;
import org.h2.value.Value;
import org.h2.value.ValueNull;
/**
* Data stored while calculating an aggregate that needs distinct values with
* their counts.
*/
class AggregateDataDistinctWithCounts extends AggregateData {
private final boolean ignoreNulls;
private final int maxDistinctCount;
private ValueHashMap<LongDataCounter> values;
/**
* Creates new instance of data for aggregate that needs distinct values
* with their counts.
*
* @param ignoreNulls
* whether NULL values should be ignored
* @param maxDistinctCount
* maximum count of distinct values to collect
*/
AggregateDataDistinctWithCounts(boolean ignoreNulls, int maxDistinctCount) {
this.ignoreNulls = ignoreNulls;
this.maxDistinctCount = maxDistinctCount;
}
@Override
void add(Database database, int dataType, Value v) {
if (ignoreNulls && v == ValueNull.INSTANCE) {
return;
}
if (values == null) {
values = new ValueHashMap<>();
}
LongDataCounter a = values.get(v);
if (a == null) {
if (values.size() >= maxDistinctCount) {
return;
}
a = new LongDataCounter();
values.put(v, a);
}
a.count++;
}
@Override
Value getValue(Database database, int dataType) {
return null;
}
/**
* Returns map with values and their counts.
*
* @return map with values and their counts
*/
ValueHashMap<LongDataCounter> getValues() {
return values;
}
}
/*
* Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
* and the EPL 1.0 (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.expression.aggregate;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map.Entry;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.engine.Mode;
import org.h2.util.ValueHashMap;
import org.h2.value.CompareMode;
import org.h2.value.Value;
import org.h2.value.ValueArray;
import org.h2.value.ValueLong;
/**
* Data stored while calculating a HISTOGRAM aggregate.
*/
class AggregateDataHistogram extends AggregateData {
private final boolean distinct;
private ValueHashMap<LongDataCounter> distinctValues;
/**
* Creates new instance of data for HISTOGRAM aggregate.
*
* @param distinct if distinct is used
*/
AggregateDataHistogram(boolean distinct) {
this.distinct = distinct;
}
@Override
void add(Database database, int dataType, Value v) {
if (distinctValues == null) {
distinctValues = new ValueHashMap<>();
}
LongDataCounter a = distinctValues.get(v);
if (a == null) {
if (distinctValues.size() >= Constants.SELECTIVITY_DISTINCT_COUNT) {
return;
}
a = new LongDataCounter();
distinctValues.put(v, a);
}
a.count++;
}
@Override
Value getValue(Database database, int dataType) {
if (distinctValues == null) {
return ValueArray.getEmpty().convertTo(dataType);
}
ValueArray[] values = new ValueArray[distinctValues.size()];
int i = 0;
for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
LongDataCounter d = entry.getValue();
values[i] = ValueArray.get(new Value[] { entry.getKey(), ValueLong.get(distinct ? 1L : d.count) });
i++;
}
final Mode mode = database.getMode();
final CompareMode compareMode = database.getCompareMode();
Arrays.sort(values, new Comparator<ValueArray>() {
@Override
public int compare(ValueArray v1, ValueArray v2) {
Value a1 = v1.getList()[0];
Value a2 = v2.getList()[0];
return a1.compareTo(a2, mode, compareMode);
}
});
return ValueArray.get(values).convertTo(dataType);
}
}
/*
* Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
* and the EPL 1.0 (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.expression.aggregate;
import java.util.Map.Entry;
import org.h2.engine.Database;
import org.h2.util.ValueHashMap;
import org.h2.value.Value;
import org.h2.value.ValueNull;
/**
* Data stored while calculating a MODE aggregate.
*/
class AggregateDataMode extends AggregateData {
private ValueHashMap<LongDataCounter> distinctValues;
@Override
void add(Database database, int dataType, Value v) {
if (v == ValueNull.INSTANCE) {
return;
}
if (distinctValues == null) {
distinctValues = new ValueHashMap<>();
}
LongDataCounter a = distinctValues.get(v);
if (a == null) {
a = new LongDataCounter();
distinctValues.put(v, a);
}
a.count++;
}
@Override
Value getValue(Database database, int dataType) {
Value v = ValueNull.INSTANCE;
if (distinctValues != null) {
long count = 0L;
for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
long c = entry.getValue().count;
if (c > count) {
v = entry.getKey();
count = c;
}
}
}
return v.convertTo(dataType);
}
Value getOrderedValue(Database database, int dataType, boolean desc) {
Value v = ValueNull.INSTANCE;
if (distinctValues != null) {
long count = 0L;
for (Entry<Value, LongDataCounter> entry : distinctValues.entries()) {
long c = entry.getValue().count;
if (c > count) {
v = entry.getKey();
count = c;
} else if (c == count) {
Value v2 = entry.getKey();
int cmp = database.compareTypeSafe(v, v2);
if (desc) {
if (cmp >= 0) {
continue;
}
} else if (cmp <= 0) {
continue;
}
v = v2;
}
}
}
return v.convertTo(dataType);
}
}
...@@ -162,7 +162,7 @@ public class TestScript extends TestDb { ...@@ -162,7 +162,7 @@ public class TestScript extends TestDb {
testScript("other/" + s + ".sql"); testScript("other/" + s + ".sql");
} }
for (String s : new String[] { "array-agg", "avg", "bit-and", "bit-or", "count", "envelope", for (String s : new String[] { "array-agg", "avg", "bit-and", "bit-or", "count", "envelope",
"group-concat", "max", "median", "min", "mode", "selectivity", "stddev-pop", "group-concat", "histogram", "max", "median", "min", "mode", "selectivity", "stddev-pop",
"stddev-samp", "sum", "var-pop", "var-samp" }) { "stddev-samp", "sum", "var-pop", "var-samp" }) {
testScript("functions/aggregate/" + s + ".sql"); testScript("functions/aggregate/" + s + ".sql");
} }
......
-- Copyright 2004-2018 H2 Group. Multiple-Licensed under the MPL 2.0,
-- and the EPL 1.0 (http://h2database.com/html/license.html).
-- Initial Developer: H2 Group
--
SELECT HISTOGRAM(X), HISTOGRAM(DISTINCT X) FROM VALUES (1), (2), (3), (1), (2), (NULL), (5) T(X);
> HISTOGRAM(C1) HISTOGRAM(DISTINCT C1)
> ------------------------------------------- -------------------------------------------
> ((null, 1), (1, 2), (2, 2), (3, 1), (5, 1)) ((null, 1), (1, 1), (2, 1), (3, 1), (5, 1))
> rows: 1
SELECT HISTOGRAM(X) FILTER (WHERE X > 1), HISTOGRAM(DISTINCT X) FILTER (WHERE X > 1)
FROM VALUES (1), (2), (3), (1), (2), (NULL), (5) T(X);
> HISTOGRAM(C1) FILTER (WHERE (C1 > 1)) HISTOGRAM(DISTINCT C1) FILTER (WHERE (C1 > 1))
> ------------------------------------- ----------------------------------------------
> ((2, 2), (3, 1), (5, 1)) ((2, 1), (3, 1), (5, 1))
> rows: 1
SELECT HISTOGRAM(X) FILTER (WHERE X > 0), HISTOGRAM(DISTINCT X) FILTER (WHERE X > 0) FROM VALUES (0) T(X);
> HISTOGRAM(C1) FILTER (WHERE (C1 > 0)) HISTOGRAM(DISTINCT C1) FILTER (WHERE (C1 > 0))
> ------------------------------------- ----------------------------------------------
> () ()
> rows: 1
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论