Unverified 提交 19767b0d authored 作者: Noel Grandin's avatar Noel Grandin 提交者: GitHub

Merge pull request #1120 from grandinj/1097_slow_querygroup

#1097 reduce memory overhead of the group by data structures
......@@ -5,6 +5,13 @@
*/
package org.h2.command.dml;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.TreeMap;
import org.h2.api.ErrorCode;
import org.h2.api.Trigger;
import org.h2.command.CommandInterface;
......@@ -12,23 +19,40 @@ import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.engine.Session;
import org.h2.engine.SysProperties;
import org.h2.expression.*;
import org.h2.expression.Alias;
import org.h2.expression.Comparison;
import org.h2.expression.ConditionAndOr;
import org.h2.expression.Expression;
import org.h2.expression.ExpressionColumn;
import org.h2.expression.ExpressionVisitor;
import org.h2.expression.Parameter;
import org.h2.index.Cursor;
import org.h2.index.Index;
import org.h2.index.IndexType;
import org.h2.message.DbException;
import org.h2.result.*;
import org.h2.table.*;
import org.h2.util.*;
import org.h2.result.LazyResult;
import org.h2.result.LocalResult;
import org.h2.result.ResultInterface;
import org.h2.result.ResultTarget;
import org.h2.result.Row;
import org.h2.result.SearchRow;
import org.h2.result.SortOrder;
import org.h2.table.Column;
import org.h2.table.ColumnResolver;
import org.h2.table.IndexColumn;
import org.h2.table.JoinBatch;
import org.h2.table.Table;
import org.h2.table.TableFilter;
import org.h2.table.TableView;
import org.h2.util.ColumnNamer;
import org.h2.util.StatementBuilder;
import org.h2.util.StringUtils;
import org.h2.util.Utils;
import org.h2.value.CompareMode;
import org.h2.value.Value;
import org.h2.value.ValueArray;
import org.h2.value.ValueNull;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
/**
* This class represents a simple SELECT statement.
*
......@@ -81,10 +105,22 @@ public class Select extends Query {
boolean[] groupByExpression;
/**
* The current group-by values.
* The array of current group-by expression data e.g. AggregateData.
*/
HashMap<Expression, Object> currentGroup;
Object[] currentGroupByExprData;
/**
* Maps an expression object to an index, to use in accessing the Object[] pointed to by groupByData.
*/
private final HashMap<Expression,Integer> exprToIndexInGroupByData = new HashMap<>();
/**
* Map of group-by key to group-by expression data e.g. AggregateData
*/
private HashMap<Value, Object[]> groupByData;
/**
* Key into groupByData that produces currentGroupByExprData. Not used in lazy mode.
*/
private ValueArray currentGroupsKey;
private int havingIndex;
private boolean isGroupQuery, isGroupSortedQuery;
private boolean isForUpdate, isForUpdateMvcc;
......@@ -151,8 +187,45 @@ public class Select extends Query {
return group;
}
public HashMap<Expression, Object> getCurrentGroup() {
return currentGroup;
/**
* Is there currently a group-by active
*/
public boolean isCurrentGroup() {
return currentGroupByExprData != null;
}
/**
* Get the group-by data for the current group and the passed in expression.
*/
public Object getCurrentGroupExprData(Expression expr) {
Integer index = exprToIndexInGroupByData.get(expr);
if (index == null) {
return null;
}
return currentGroupByExprData[index];
}
/**
* Set the group-by data for the current group and the passed in expression.
*/
public void setCurrentGroupExprData(Expression expr, Object obj) {
Integer index = exprToIndexInGroupByData.get(expr);
if (index != null) {
assert currentGroupByExprData[index] == null;
currentGroupByExprData[index] = obj;
return;
}
index = exprToIndexInGroupByData.size();
exprToIndexInGroupByData.put(expr, index);
if (index >= currentGroupByExprData.length) {
currentGroupByExprData = Arrays.copyOf(currentGroupByExprData, currentGroupByExprData.length * 2);
// this can be null in lazy mode
if (currentGroupsKey != null) {
// since we changed the size of the array, update the object in the groups map
groupByData.put(currentGroupsKey, currentGroupByExprData);
}
}
currentGroupByExprData[index] = obj;
}
public int getCurrentGroupRowId() {
......@@ -313,72 +386,79 @@ public class Select extends Query {
}
private void queryGroup(int columnCount, LocalResult result) {
ValueHashMap<HashMap<Expression, Object>> groups =
ValueHashMap.newInstance();
int rowNumber = 0;
setCurrentRowNumber(0);
currentGroup = null;
ValueArray defaultGroup = ValueArray.get(new Value[0]);
int sampleSize = getSampleSizeValue(session);
while (topTableFilter.next()) {
setCurrentRowNumber(rowNumber + 1);
if (isConditionMet()) {
Value key;
rowNumber++;
if (groupIndex == null) {
key = defaultGroup;
} else {
Value[] keyValues = new Value[groupIndex.length];
// update group
for (int i = 0; i < groupIndex.length; i++) {
int idx = groupIndex[i];
Expression expr = expressions.get(idx);
keyValues[i] = expr.getValue(session);
groupByData = new HashMap<>();
currentGroupByExprData = null;
currentGroupsKey = null;
exprToIndexInGroupByData.clear();
try {
int rowNumber = 0;
setCurrentRowNumber(0);
ValueArray defaultGroup = ValueArray.get(new Value[0]);
int sampleSize = getSampleSizeValue(session);
while (topTableFilter.next()) {
setCurrentRowNumber(rowNumber + 1);
if (isConditionMet()) {
rowNumber++;
if (groupIndex == null) {
currentGroupsKey = defaultGroup;
} else {
Value[] keyValues = new Value[groupIndex.length];
// update group
for (int i = 0; i < groupIndex.length; i++) {
int idx = groupIndex[i];
Expression expr = expressions.get(idx);
keyValues[i] = expr.getValue(session);
}
currentGroupsKey = ValueArray.get(keyValues);
}
Object[] values = groupByData.get(currentGroupsKey);
if (values == null) {
values = new Object[Math.max(exprToIndexInGroupByData.size(), expressions.size())];
groupByData.put(currentGroupsKey, values);
}
currentGroupByExprData = values;
currentGroupRowId++;
for (int i = 0; i < columnCount; i++) {
if (groupByExpression == null || !groupByExpression[i]) {
Expression expr = expressions.get(i);
expr.updateAggregate(session);
}
}
if (sampleSize > 0 && rowNumber >= sampleSize) {
break;
}
key = ValueArray.get(keyValues);
}
HashMap<Expression, Object> values = groups.get(key);
if (values == null) {
values = new HashMap<>();
groups.put(key, values);
}
if (groupIndex == null && groupByData.size() == 0) {
groupByData.put(defaultGroup,
new Object[Math.max(exprToIndexInGroupByData.size(), expressions.size())]);
}
for (Map.Entry<Value, Object[]> entry : groupByData.entrySet()) {
currentGroupsKey = (ValueArray) entry.getKey();
currentGroupByExprData = entry.getValue();
Value[] keyValues = currentGroupsKey.getList();
Value[] row = new Value[columnCount];
for (int j = 0; groupIndex != null && j < groupIndex.length; j++) {
row[groupIndex[j]] = keyValues[j];
}
currentGroup = values;
currentGroupRowId++;
for (int i = 0; i < columnCount; i++) {
if (groupByExpression == null || !groupByExpression[i]) {
Expression expr = expressions.get(i);
expr.updateAggregate(session);
for (int j = 0; j < columnCount; j++) {
if (groupByExpression != null && groupByExpression[j]) {
continue;
}
Expression expr = expressions.get(j);
row[j] = expr.getValue(session);
}
if (sampleSize > 0 && rowNumber >= sampleSize) {
break;
}
}
}
if (groupIndex == null && groups.size() == 0) {
groups.put(defaultGroup, new HashMap<Expression, Object>());
}
ArrayList<Value> keys = groups.keys();
for (Value v : keys) {
ValueArray key = (ValueArray) v;
currentGroup = groups.get(key);
Value[] keyValues = key.getList();
Value[] row = new Value[columnCount];
for (int j = 0; groupIndex != null && j < groupIndex.length; j++) {
row[groupIndex[j]] = keyValues[j];
}
for (int j = 0; j < columnCount; j++) {
if (groupByExpression != null && groupByExpression[j]) {
if (isHavingNullOrFalse(row)) {
continue;
}
Expression expr = expressions.get(j);
row[j] = expr.getValue(session);
}
if (isHavingNullOrFalse(row)) {
continue;
row = keepOnlyDistinct(row, columnCount);
result.addRow(row);
}
row = keepOnlyDistinct(row, columnCount);
result.addRow(row);
} finally {
groupByData = null;
currentGroupsKey = null;
currentGroupByExprData = null;
exprToIndexInGroupByData.clear();
}
}
......@@ -1475,13 +1555,15 @@ public class Select extends Query {
LazyResultGroupSorted(Expression[] expressions, int columnCount) {
super(expressions, columnCount);
currentGroup = null;
currentGroupByExprData = null;
currentGroupsKey = null;
}
@Override
public void reset() {
super.reset();
currentGroup = null;
currentGroupByExprData = null;
currentGroupsKey = null;
}
@Override
......@@ -1501,11 +1583,11 @@ public class Select extends Query {
Value[] row = null;
if (previousKeyValues == null) {
previousKeyValues = keyValues;
currentGroup = new HashMap<>();
currentGroupByExprData = new Object[Math.max(exprToIndexInGroupByData.size(), expressions.size())];
} else if (!Arrays.equals(previousKeyValues, keyValues)) {
row = createGroupSortedRow(previousKeyValues, columnCount);
previousKeyValues = keyValues;
currentGroup = new HashMap<>();
currentGroupByExprData = new Object[Math.max(exprToIndexInGroupByData.size(), expressions.size())];
}
currentGroupRowId++;
......
......@@ -283,8 +283,7 @@ public class Aggregate extends Expression {
// if (on != null) {
// on.updateAggregate();
// }
HashMap<Expression, Object> group = select.getCurrentGroup();
if (group == null) {
if (!select.isCurrentGroup()) {
// this is a different level (the enclosing query)
return;
}
......@@ -296,10 +295,10 @@ public class Aggregate extends Expression {
}
lastGroupRowId = groupRowId;
AggregateData data = (AggregateData) group.get(this);
AggregateData data = (AggregateData) select.getCurrentGroupExprData(this);
if (data == null) {
data = AggregateData.create(type);
group.put(this, data);
select.setCurrentGroupExprData(this, data);
}
Value v = on == null ? null : on.getValue(session);
if (type == AggregateType.GROUP_CONCAT) {
......@@ -372,13 +371,13 @@ public class Aggregate extends Expression {
DbException.throwInternalError("type=" + type);
}
}
HashMap<Expression, Object> group = select.getCurrentGroup();
if (group == null) {
throw DbException.get(ErrorCode.INVALID_USE_OF_AGGREGATE_FUNCTION_1, getSQL());
if (!select.isCurrentGroup()) {
throw DbException.get(ErrorCode.INVALID_USE_OF_AGGREGATE_FUNCTION_1, getSQL());
}
AggregateData data = (AggregateData) group.get(this);
AggregateData data = (AggregateData)select.getCurrentGroupExprData(this);
if (data == null) {
data = AggregateData.create(type);
select.setCurrentGroupExprData(this, data);
}
if (type == AggregateType.GROUP_CONCAT) {
Value[] array = ((AggregateDataCollecting) data).getArray();
......
......@@ -7,6 +7,7 @@ package org.h2.expression;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.util.ValueHashMap;
......@@ -47,9 +48,9 @@ class AggregateDataHistogram extends AggregateData {
}
ValueArray[] values = new ValueArray[distinctValues.size()];
int i = 0;
for (Value dv : distinctValues.keys()) {
AggregateDataHistogram d = distinctValues.get(dv);
values[i] = ValueArray.get(new Value[] { dv, ValueLong.get(d.count) });
for (Map.Entry<Value,AggregateDataHistogram> entry : distinctValues.entries()) {
AggregateDataHistogram d = entry.getValue();
values[i] = ValueArray.get(new Value[] { entry.getKey(), ValueLong.get(d.count) });
i++;
}
final CompareMode compareMode = database.getCompareMode();
......
......@@ -159,14 +159,13 @@ public class ExpressionColumn extends Expression {
if (select == null) {
throw DbException.get(ErrorCode.MUST_GROUP_BY_COLUMN_1, getSQL());
}
HashMap<Expression, Object> values = select.getCurrentGroup();
if (values == null) {
if (!select.isCurrentGroup()) {
// this is a different level (the enclosing query)
return;
}
Value v = (Value) values.get(this);
Value v = (Value) select.getCurrentGroupExprData(this);
if (v == null) {
values.put(this, now);
select.setCurrentGroupExprData(this, now);
} else {
if (!database.areEqual(now, v)) {
throw DbException.get(ErrorCode.MUST_GROUP_BY_COLUMN_1, getSQL());
......@@ -178,9 +177,8 @@ public class ExpressionColumn extends Expression {
public Value getValue(Session session) {
Select select = columnResolver.getSelect();
if (select != null) {
HashMap<Expression, Object> values = select.getCurrentGroup();
if (values != null) {
Value v = (Value) values.get(this);
if (select.isCurrentGroup()) {
Value v = (Value) select.getCurrentGroupExprData(this);
if (v != null) {
return v;
}
......
......@@ -167,15 +167,14 @@ public class JavaAggregate extends Expression {
@Override
public Value getValue(Session session) {
HashMap<Expression, Object> group = select.getCurrentGroup();
if (group == null) {
if (!select.isCurrentGroup()) {
throw DbException.get(ErrorCode.INVALID_USE_OF_AGGREGATE_FUNCTION_1, getSQL());
}
try {
Aggregate agg;
if (distinct) {
agg = getInstance();
AggregateDataCollecting data = (AggregateDataCollecting) group.get(this);
AggregateDataCollecting data = (AggregateDataCollecting) select.getCurrentGroupExprData(this);
if (data != null) {
for (Value value : data.values) {
if (args.length == 1) {
......@@ -191,7 +190,7 @@ public class JavaAggregate extends Expression {
}
}
} else {
agg = (Aggregate) group.get(this);
agg = (Aggregate) select.getCurrentGroupExprData(this);
if (agg == null) {
agg = getInstance();
}
......@@ -208,8 +207,7 @@ public class JavaAggregate extends Expression {
@Override
public void updateAggregate(Session session) {
HashMap<Expression, Object> group = select.getCurrentGroup();
if (group == null) {
if (!select.isCurrentGroup()) {
// this is a different level (the enclosing query)
return;
}
......@@ -229,10 +227,10 @@ public class JavaAggregate extends Expression {
try {
if (distinct) {
AggregateDataCollecting data = (AggregateDataCollecting) group.get(this);
AggregateDataCollecting data = (AggregateDataCollecting) select.getCurrentGroupExprData(this);
if (data == null) {
data = new AggregateDataCollecting();
group.put(this, data);
select.setCurrentGroupExprData(this, data);
}
Value[] argValues = new Value[args.length];
Value arg = null;
......@@ -243,10 +241,10 @@ public class JavaAggregate extends Expression {
}
data.add(session.getDatabase(), dataType, true, args.length == 1 ? arg : ValueArray.get(argValues));
} else {
Aggregate agg = (Aggregate) group.get(this);
Aggregate agg = (Aggregate) select.getCurrentGroupExprData(this);
if (agg == null) {
agg = getInstance();
group.put(this, agg);
select.setCurrentGroupExprData(this, agg);
}
Object[] argValues = new Object[args.length];
Object arg = null;
......
......@@ -5,20 +5,38 @@
*/
package org.h2.util;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import org.h2.message.DbException;
import org.h2.value.Value;
import org.h2.value.ValueNull;
/**
* This hash map supports keys of type Value.
* <p>
* ValueHashMap is a very simple implementation without allocation of additional
* objects for entries. It's very fast with good distribution of hashes, but if
* hashes have a lot of collisions this implementation tends to be very slow.
* <p>
* HashMap in archaic versions of Java have some overhead for allocation of
* entries, but slightly better behaviour with limited number of collisions,
* because collisions have no impact on non-colliding entries. HashMap in modern
* versions of Java also have the same overhead, but it builds a trees of keys
* with colliding hashes, that's why even if the all keys have exactly the same
* hash code it still offers a good performance similar to TreeMap. So
* ValueHashMap is faster in typical cases, but may behave really bad in some
* cases. HashMap is slower in typical cases, but its performance does not
* degrade too much even in the worst possible case (if keys are comparable).
*
* @param <V> the value type
*/
public class ValueHashMap<V> extends HashBase {
private Value[] keys;
private V[] values;
Value[] keys;
V[] values;
/**
* Create a new value hash map.
......@@ -174,6 +192,51 @@ public class ValueHashMap<V> extends HashBase {
}
return list;
}
public Iterable<Map.Entry<Value, V>> entries() {
return new EntryIterable();
}
private final class EntryIterable implements Iterable<Map.Entry<Value, V>> {
EntryIterable() {
}
@Override
public Iterator<Map.Entry<Value, V>> iterator() {
return new EntryIterator();
}
}
private final class EntryIterator implements Iterator<Map.Entry<Value, V>> {
private int keysIndex = -1;
private int left = size;
EntryIterator() {
}
@Override
public boolean hasNext() {
return left > 0;
}
@Override
public Map.Entry<Value, V> next() {
if (left <= 0)
throw new NoSuchElementException();
left--;
for (;;) {
keysIndex++;
Value key = keys[keysIndex];
if (key != null && key != ValueNull.DELETED)
return new AbstractMap.SimpleImmutableEntry<Value, V>(key, values[keysIndex]);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
/**
* Get the list of values.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论