Unverified 提交 15866142 authored 作者: Evgenij Ryazanov's avatar Evgenij Ryazanov 提交者: GitHub

Merge pull request #1301 from katzyn/distinct

Add initial DISTINCT ON (expression, ...) implementation
......@@ -3,7 +3,8 @@
# Initial Developer: H2 Group
"SECTION","TOPIC","SYNTAX","TEXT","EXAMPLE"
"Commands (DML)","SELECT","
SELECT [ TOP term ] [ DISTINCT | ALL ] selectExpression [,...]
SELECT [ TOP term ] [ DISTINCT [ ON ( expression [,...] ) ] | ALL ]
selectExpression [,...]
FROM tableExpression [,...] [ WHERE expression ]
[ GROUP BY expression [,...] ] [ HAVING expression ]
[ { UNION [ ALL ] | MINUS | EXCEPT | INTERSECT } select ]
......@@ -42,6 +43,8 @@ SELECT * FROM TEST LIMIT 1000;
SELECT * FROM (SELECT ID, COUNT(*) FROM TEST
GROUP BY ID UNION SELECT NULL, COUNT(*) FROM TEST)
ORDER BY 1 NULLS LAST;
SELECT DISTINCT C1, C2 FROM TEST;
SELECT DISTINCT ON(C1) C1, C2 FROM TEST ORDER BY C1;
"
"Commands (DML)","INSERT","
......
......@@ -2535,7 +2535,16 @@ public class Parser {
}
currentSelect = temp;
if (readIf(DISTINCT)) {
command.setDistinct();
if (readIf(ON)) {
read(OPEN_PAREN);
ArrayList<Expression> distinctExpressions = Utils.newSmallArrayList();
do {
distinctExpressions.add(readExpression());
} while (readIfMore(true));
command.setDistinct(distinctExpressions.toArray(new Expression[0]));
} else {
command.setDistinct();
}
} else {
readIf(ALL);
}
......
......@@ -7,6 +7,7 @@ package org.h2.command.dml;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.h2.api.ErrorCode;
import org.h2.command.Prepared;
......@@ -266,7 +267,18 @@ public abstract class Query extends Prepared {
*/
public abstract void setDistinctIfPossible();
public boolean isDistinct() {
/**
* @return whether this query is a plain {@code DISTINCT} query
*/
public boolean isStardardDistinct() {
return distinct;
}
/**
* @return whether this query is a {@code DISTINCT} or
* {@code DISTINCT ON (...)} query
*/
public boolean isAnyDistinct() {
return distinct;
}
......@@ -405,102 +417,109 @@ public abstract class Query extends Prepared {
static void initOrder(Session session,
ArrayList<Expression> expressions,
ArrayList<String> expressionSQL,
ArrayList<SelectOrderBy> orderList,
List<SelectOrderBy> orderList,
int visible,
boolean mustBeInResult,
ArrayList<TableFilter> filters) {
Database db = session.getDatabase();
for (SelectOrderBy o : orderList) {
Expression e = o.expression;
if (e == null) {
continue;
}
// special case: SELECT 1 AS A FROM DUAL ORDER BY A
// (oracle supports it, but only in order by, not in group by and
// not in having):
// SELECT 1 AS A FROM DUAL ORDER BY -A
boolean isAlias = false;
int idx = expressions.size();
if (e instanceof ExpressionColumn) {
// order by expression
ExpressionColumn exprCol = (ExpressionColumn) e;
String tableAlias = exprCol.getOriginalTableAliasName();
String col = exprCol.getOriginalColumnName();
for (int j = 0; j < visible; j++) {
boolean found = false;
Expression ec = expressions.get(j);
if (ec instanceof ExpressionColumn) {
// select expression
ExpressionColumn c = (ExpressionColumn) ec;
found = db.equalsIdentifiers(col, c.getColumnName());
if (found && tableAlias != null) {
String ca = c.getOriginalTableAliasName();
if (ca == null) {
found = false;
if (filters != null) {
// select id from test order by test.id
for (TableFilter f : filters) {
if (db.equalsIdentifiers(f.getTableAlias(), tableAlias)) {
found = true;
break;
}
int idx = initExpression(session, expressions, expressionSQL, e, visible, mustBeInResult, filters);
o.columnIndexExpr = ValueExpression.get(ValueInt.get(idx + 1));
o.expression = expressions.get(idx).getNonAliasExpression();
}
}
static int initExpression(Session session, ArrayList<Expression> expressions,
ArrayList<String> expressionSQL, Expression e, int visible, boolean mustBeInResult,
ArrayList<TableFilter> filters) {
Database db = session.getDatabase();
// special case: SELECT 1 AS A FROM DUAL ORDER BY A
// (oracle supports it, but only in order by, not in group by and
// not in having):
// SELECT 1 AS A FROM DUAL ORDER BY -A
boolean isAlias = false;
int idx = expressions.size();
if (e instanceof ExpressionColumn) {
// order by expression
ExpressionColumn exprCol = (ExpressionColumn) e;
String tableAlias = exprCol.getOriginalTableAliasName();
String col = exprCol.getOriginalColumnName();
for (int j = 0; j < visible; j++) {
boolean found = false;
Expression ec = expressions.get(j);
if (ec instanceof ExpressionColumn) {
// select expression
ExpressionColumn c = (ExpressionColumn) ec;
found = db.equalsIdentifiers(col, c.getColumnName());
if (found && tableAlias != null) {
String ca = c.getOriginalTableAliasName();
if (ca == null) {
found = false;
if (filters != null) {
// select id from test order by test.id
for (TableFilter f : filters) {
if (db.equalsIdentifiers(f.getTableAlias(), tableAlias)) {
found = true;
break;
}
}
} else {
found = db.equalsIdentifiers(ca, tableAlias);
}
} else {
found = db.equalsIdentifiers(ca, tableAlias);
}
} else if (!(ec instanceof Alias)) {
continue;
} else if (tableAlias == null && db.equalsIdentifiers(col, ec.getAlias())) {
found = true;
} else {
Expression ec2 = ec.getNonAliasExpression();
if (ec2 instanceof ExpressionColumn) {
ExpressionColumn c2 = (ExpressionColumn) ec2;
String ta = exprCol.getSQL();
String tb = c2.getSQL();
String s2 = c2.getColumnName();
found = db.equalsIdentifiers(col, s2);
if (!db.equalsIdentifiers(ta, tb)) {
found = false;
}
}
} else if (!(ec instanceof Alias)) {
continue;
} else if (tableAlias == null && db.equalsIdentifiers(col, ec.getAlias())) {
found = true;
} else {
Expression ec2 = ec.getNonAliasExpression();
if (ec2 instanceof ExpressionColumn) {
ExpressionColumn c2 = (ExpressionColumn) ec2;
String ta = exprCol.getSQL();
String tb = c2.getSQL();
String s2 = c2.getColumnName();
found = db.equalsIdentifiers(col, s2);
if (!db.equalsIdentifiers(ta, tb)) {
found = false;
}
}
if (found) {
}
if (found) {
idx = j;
isAlias = true;
break;
}
}
} else {
String s = e.getSQL();
if (expressionSQL != null) {
for (int j = 0, size = expressionSQL.size(); j < size; j++) {
String s2 = expressionSQL.get(j);
if (db.equalsIdentifiers(s2, s)) {
idx = j;
isAlias = true;
break;
}
}
} else {
String s = e.getSQL();
if (expressionSQL != null) {
for (int j = 0, size = expressionSQL.size(); j < size; j++) {
String s2 = expressionSQL.get(j);
if (db.equalsIdentifiers(s2, s)) {
idx = j;
isAlias = true;
break;
}
}
}
}
if (!isAlias) {
if (mustBeInResult) {
if (session.getDatabase().getMode().getEnum() != ModeEnum.MySQL) {
if (!checkOrderOther(session, e, expressionSQL)) {
throw DbException.get(ErrorCode.ORDER_BY_NOT_IN_RESULT, e.getSQL());
}
}
if (!isAlias) {
if (mustBeInResult) {
if (session.getDatabase().getMode().getEnum() != ModeEnum.MySQL) {
if (!checkOrderOther(session, e, expressionSQL)) {
throw DbException.get(ErrorCode.ORDER_BY_NOT_IN_RESULT, e.getSQL());
}
}
expressions.add(e);
String sql = e.getSQL();
expressionSQL.add(sql);
}
o.columnIndexExpr = ValueExpression.get(ValueInt.get(idx + 1));
o.expression = expressions.get(idx).getNonAliasExpression();
expressions.add(e);
String sql = e.getSQL();
expressionSQL.add(sql);
}
return idx;
}
/**
......
......@@ -7,6 +7,7 @@ package org.h2.command.dml;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
......@@ -89,6 +90,13 @@ public class Select extends Query {
*/
int visibleColumnCount;
/**
* {@code DISTINCT ON(...)} expressions.
*/
private Expression[] distinctExpressions;
private int[] distinctIndexes;
private int distinctColumnCount;
private ArrayList<SelectOrderBy> orderList;
private ArrayList<Expression> group;
......@@ -247,13 +255,36 @@ public class Select extends Query {
return orderList != null || sort != null;
}
@Override
public void setDistinct() {
if (distinctExpressions != null) {
throw DbException.getUnsupportedException("DISTINCT ON together with DISTINCT");
}
distinct = true;
}
/**
* Set the distinct expressions.
*/
public void setDistinct(Expression[] distinctExpressions) {
if (distinct) {
throw DbException.getUnsupportedException("DISTINCT ON together with DISTINCT");
}
this.distinctExpressions = distinctExpressions;
}
@Override
public void setDistinctIfPossible() {
if (!distinct && offsetExpr == null && limitExpr == null) {
setDistinct();
if (!isAnyDistinct() && offsetExpr == null && limitExpr == null) {
distinct = true;
}
}
@Override
public boolean isAnyDistinct() {
return distinct || distinctExpressions != null;
}
/**
* Add a condition to the list of conditions.
*
......@@ -666,13 +697,18 @@ public class Select extends Query {
!session.getDatabase().getSettings().optimizeInsertFromSelect)) {
result = createLocalResult(result);
}
if (sort != null && (!sortUsingIndex || distinct)) {
if (sort != null && (!sortUsingIndex || isAnyDistinct())) {
result = createLocalResult(result);
result.setSortOrder(sort);
}
if (distinct && !isDistinctQuery) {
if (distinct) {
if (!isDistinctQuery) {
result = createLocalResult(result);
result.setDistinct();
}
} else if (distinctExpressions != null) {
result = createLocalResult(result);
result.setDistinct();
result.setDistinct(distinctIndexes);
}
if (isGroupQuery && !isGroupSortedQuery) {
result = createLocalResult(result);
......@@ -687,7 +723,7 @@ public class Select extends Query {
if (isGroupQuery) {
throw DbException.getUnsupportedException(
"MVCC=TRUE && FOR UPDATE && GROUP");
} else if (distinct) {
} else if (isAnyDistinct()) {
throw DbException.getUnsupportedException(
"MVCC=TRUE && FOR UPDATE && DISTINCT");
} else if (isQuickAggregateQuery) {
......@@ -851,7 +887,7 @@ public class Select extends Query {
expandColumnList();
visibleColumnCount = expressions.size();
ArrayList<String> expressionSQL;
if (orderList != null || group != null) {
if (distinctExpressions != null || orderList != null || group != null) {
expressionSQL = new ArrayList<>(visibleColumnCount);
for (int i = 0; i < visibleColumnCount; i++) {
Expression expr = expressions.get(i);
......@@ -862,9 +898,23 @@ public class Select extends Query {
} else {
expressionSQL = null;
}
if (distinctExpressions != null) {
BitSet set = new BitSet();
for (Expression e : distinctExpressions) {
set.set(initExpression(session, expressions, expressionSQL, e, visibleColumnCount, false,
filters));
}
int idx = 0, cnt = set.cardinality();
distinctIndexes = new int[cnt];
for (int i = 0; i < cnt; i++) {
idx = set.nextSetBit(idx);
distinctIndexes[i] = idx;
idx++;
}
}
if (orderList != null) {
initOrder(session, expressions, expressionSQL, orderList,
visibleColumnCount, distinct, filters);
visibleColumnCount, isAnyDistinct(), filters);
}
distinctColumnCount = expressions.size();
if (having != null) {
......@@ -1198,8 +1248,17 @@ public class Select extends Query {
}
buff.resetCount();
buff.append("SELECT");
if (distinct) {
if (isAnyDistinct()) {
buff.append(" DISTINCT");
if (distinctExpressions != null) {
buff.append(" ON(");
for (Expression distinctExpression: distinctExpressions) {
buff.appendExceptFirst(", ");
buff.append(distinctExpression.getSQL());
}
buff.append(')');
buff.resetCount();
}
}
for (int i = 0; i < visibleColumnCount; i++) {
buff.appendExceptFirst(",");
......
......@@ -29,10 +29,15 @@ import org.h2.value.ValueArray;
class MVSortedTempResult extends MVTempResult {
/**
* Whether this result is distinct.
* Whether this result is a standard distinct result.
*/
private final boolean distinct;
/**
* Distinct indexes for DISTINCT ON results.
*/
private final int[] distinctIndexes;
/**
* Mapping of indexes of columns to its positions in the store, or {@code null}
* if columns are not reordered.
......@@ -45,11 +50,6 @@ class MVSortedTempResult extends MVTempResult {
*/
private final MVMap<ValueArray, Long> map;
/**
* The type of the distinct values.
*/
private final ValueDataType distinctType;
/**
* Optional index. This index is created only if result is distinct and
* {@code columnCount != distinctColumnCount} or if
......@@ -84,9 +84,9 @@ class MVSortedTempResult extends MVTempResult {
private MVSortedTempResult(MVSortedTempResult parent) {
super(parent);
this.distinct = parent.distinct;
this.distinctIndexes = parent.distinctIndexes;
this.indexes = parent.indexes;
this.map = parent.map;
this.distinctType = null;
this.rowCount = parent.rowCount;
}
......@@ -99,16 +99,19 @@ class MVSortedTempResult extends MVTempResult {
* column expressions
* @param distinct
* whether this result should be distinct
* @param distinctIndexes
* indexes of distinct columns for DISINCT ON results
* @param visibleColumnCount
* count of visible columns
* @param sort
* sort order, or {@code null} if this result does not need any
* sorting
*/
MVSortedTempResult(Database database, Expression[] expressions, boolean distinct, int visibleColumnCount,
MVSortedTempResult(Database database, Expression[] expressions, boolean distinct, int[] distinctIndexes, int visibleColumnCount,
SortOrder sort) {
super(database, expressions.length, visibleColumnCount);
this.distinct = distinct;
this.distinctIndexes = distinctIndexes;
int length = columnCount;
int[] sortTypes = new int[length];
int[] indexes;
......@@ -166,15 +169,11 @@ class MVSortedTempResult extends MVTempResult {
ValueDataType keyType = new ValueDataType(database.getCompareMode(), database, sortTypes);
Builder<ValueArray, Long> builder = new MVMap.Builder<ValueArray, Long>().keyType(keyType);
map = store.openMap("tmp", builder);
if (length == visibleColumnCount) {
distinctType = null;
} else {
distinctType = new ValueDataType(database.getCompareMode(), database, new int[visibleColumnCount]);
if (distinct) {
Builder<ValueArray, Boolean> indexBuilder = new MVMap.Builder<ValueArray, Boolean>()
.keyType(distinctType);
index = store.openMap("idx", indexBuilder);
}
if (distinct && length != visibleColumnCount || distinctIndexes != null) {
int count = distinctIndexes != null ? distinctIndexes.length : visibleColumnCount;
ValueDataType distinctType = new ValueDataType(database.getCompareMode(), database, new int[count]);
Builder<ValueArray, Boolean> indexBuilder = new MVMap.Builder<ValueArray, Boolean>().keyType(distinctType);
index = store.openMap("idx", indexBuilder);
}
}
......@@ -182,8 +181,18 @@ class MVSortedTempResult extends MVTempResult {
public int addRow(Value[] values) {
assert parent == null;
ValueArray key = getKey(values);
if (distinct) {
if (columnCount != visibleColumnCount) {
if (distinct || distinctIndexes != null) {
if (distinctIndexes != null) {
int cnt = distinctIndexes.length;
Value[] newValues = new Value[cnt];
for (int i = 0; i < cnt; i++) {
newValues[i] = values[distinctIndexes[i]];
}
ValueArray distinctRow = ValueArray.get(newValues);
if (index.putIfAbsent(distinctRow, true) != null) {
return rowCount;
}
} else if (columnCount != visibleColumnCount) {
ValueArray distinctRow = ValueArray.get(Arrays.copyOf(values, visibleColumnCount));
if (index.putIfAbsent(distinctRow, true) != null) {
return rowCount;
......
......@@ -66,6 +66,8 @@ public abstract class MVTempResult implements ResultExternal {
* expressions
* @param distinct
* is output distinct
* @param distinctIndexes
* indexes of distinct columns for DISINCT ON results
* @param visibleColumnCount
* count of visible columns
* @param sort
......@@ -73,9 +75,9 @@ public abstract class MVTempResult implements ResultExternal {
* @return temporary result
*/
public static ResultExternal of(Database database, Expression[] expressions, boolean distinct,
int visibleColumnCount, SortOrder sort) {
return distinct || sort != null
? new MVSortedTempResult(database, expressions, distinct, visibleColumnCount, sort)
int[] distinctIndexes, int visibleColumnCount, SortOrder sort) {
return distinct || distinctIndexes != null || sort != null
? new MVSortedTempResult(database, expressions, distinct, distinctIndexes, visibleColumnCount, sort)
: new MVPlainTempResult(database, expressions, visibleColumnCount);
}
......
......@@ -43,6 +43,7 @@ public class LocalResult implements ResultInterface, ResultTarget {
private int limit = -1;
private ResultExternal external;
private boolean distinct;
private int[] distinctIndexes;
private boolean closed;
private boolean containsLobs;
......@@ -150,6 +151,7 @@ public class LocalResult implements ResultInterface, ResultTarget {
copy.sort = this.sort;
copy.distinctRows = this.distinctRows;
copy.distinct = distinct;
copy.distinctIndexes = distinctIndexes;
copy.currentRow = null;
copy.offset = 0;
copy.limit = -1;
......@@ -170,10 +172,29 @@ public class LocalResult implements ResultInterface, ResultTarget {
* Remove duplicate rows.
*/
public void setDistinct() {
assert distinctIndexes == null;
distinct = true;
distinctRows = ValueHashMap.newInstance();
}
/**
* Remove rows with duplicates in columns with specified indexes.
*
* @param distinctIndexes distinct indexes
*/
public void setDistinct(int[] distinctIndexes) {
assert !distinct;
this.distinctIndexes = distinctIndexes;
distinctRows = ValueHashMap.newInstance();
}
/**
* @return whether this result is a distinct result
*/
public boolean isAnyDistinct() {
return distinct || distinctIndexes != null;
}
/**
* Remove the row from the result set if it exists.
*
......@@ -208,7 +229,7 @@ public class LocalResult implements ResultInterface, ResultTarget {
if (distinctRows == null) {
distinctRows = ValueHashMap.newInstance();
for (Value[] row : rows) {
ValueArray array = getArrayOfVisible(row);
ValueArray array = getArrayOfDistinct(row);
distinctRows.put(array, array.getList());
}
}
......@@ -269,8 +290,15 @@ public class LocalResult implements ResultInterface, ResultTarget {
}
}
private ValueArray getArrayOfVisible(Value[] values) {
if (values.length > visibleColumnCount) {
private ValueArray getArrayOfDistinct(Value[] values) {
if (distinctIndexes != null) {
int cnt = distinctIndexes.length;
Value[] newValues = new Value[cnt];
for (int i = 0; i < cnt; i++) {
newValues[i] = values[distinctIndexes[i]];
}
values = newValues;
} else if (values.length > visibleColumnCount) {
values = Arrays.copyOf(values, visibleColumnCount);
}
return ValueArray.get(values);
......@@ -280,7 +308,8 @@ public class LocalResult implements ResultInterface, ResultTarget {
Database database = session.getDatabase();
external = database.isMVStore()
|| /* not supported by ResultTempTable */ distinct && expressions.length != visibleColumnCount
? MVTempResult.of(database, expressions, distinct, visibleColumnCount, sort)
|| distinctIndexes != null
? MVTempResult.of(database, expressions, distinct, distinctIndexes, visibleColumnCount, sort)
: new ResultTempTable(session, expressions, distinct, sort);
}
......@@ -292,10 +321,10 @@ public class LocalResult implements ResultInterface, ResultTarget {
@Override
public void addRow(Value[] values) {
cloneLobs(values);
if (distinct) {
if (isAnyDistinct()) {
if (distinctRows != null) {
ValueArray array = getArrayOfVisible(values);
distinctRows.put(array, values);
ValueArray array = getArrayOfDistinct(values);
distinctRows.putIfAbsent(array, values);
rowCount = distinctRows.size();
if (rowCount > maxMemoryRows) {
createExternalResult();
......@@ -334,7 +363,7 @@ public class LocalResult implements ResultInterface, ResultTarget {
if (external != null) {
addRowsToDisk();
} else {
if (distinct) {
if (isAnyDistinct()) {
rows = distinctRows.values();
}
if (sort != null) {
......
......@@ -66,7 +66,7 @@ public class ValueHashMap<V> extends HashBase {
if (k != null && k != ValueNull.DELETED) {
// skip the checkSizePut so we don't end up
// accidentally recursing
internalPut(k, oldValues[i]);
internalPut(k, oldValues[i], false);
}
}
}
......@@ -88,10 +88,21 @@ public class ValueHashMap<V> extends HashBase {
*/
public void put(Value key, V value) {
checkSizePut();
internalPut(key, value);
internalPut(key, value, false);
}
private void internalPut(Value key, V value) {
/**
* Add a key value pair, values for existing keys are not replaced.
*
* @param key the key
* @param value the new value
*/
public void putIfAbsent(Value key, V value) {
checkSizePut();
internalPut(key, value, true);
}
private void internalPut(Value key, V value, boolean ifAbsent) {
int index = getIndex(key);
int plus = 1;
int deleted = -1;
......@@ -113,6 +124,9 @@ public class ValueHashMap<V> extends HashBase {
deleted = index;
}
} else if (k.equals(key)) {
if (ifAbsent) {
return;
}
// update existing
values[index] = value;
return;
......
......@@ -103,3 +103,49 @@ DROP TABLE TEST;
DROP TABLE TEST2;
> ok
CREATE TABLE TEST(C1 INT, C2 INT, C3 INT, C4 INT, C5 INT);
> ok
INSERT INTO TEST VALUES(1, 2, 3, 4, 5), (1, 2, 3, 6, 7), (2, 1, 4, 8, 9), (3, 4, 5, 1, 1);
> update count: 4
SELECT DISTINCT ON(C1, C2) C1, C2, C3, C4, C5 FROM TEST;
> C1 C2 C3 C4 C5
> -- -- -- -- --
> 1 2 3 4 5
> 2 1 4 8 9
> 3 4 5 1 1
> rows: 3
SELECT DISTINCT ON(C1 + C2) C1, C2, C3, C4, C5 FROM TEST;
> C1 C2 C3 C4 C5
> -- -- -- -- --
> 1 2 3 4 5
> 3 4 5 1 1
> rows: 2
SELECT DISTINCT ON(C1 + C2, C3) C1, C2, C3, C4, C5 FROM TEST;
> C1 C2 C3 C4 C5
> -- -- -- -- --
> 1 2 3 4 5
> 2 1 4 8 9
> 3 4 5 1 1
> rows: 3
SELECT DISTINCT ON(C1) C2 FROM TEST ORDER BY C1;
> C2
> --
> 2
> 1
> 4
> rows (ordered): 3
EXPLAIN SELECT DISTINCT ON(C1) C2 FROM TEST ORDER BY C1;
>> SELECT DISTINCT ON(C1) C2 FROM PUBLIC.TEST /* PUBLIC.TEST.tableScan */ ORDER BY =C1
SELECT DISTINCT ON(C1) C2 FROM TEST ORDER BY C3;
> exception ORDER_BY_NOT_IN_RESULT
DROP TABLE TEST;
> ok
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论